import sys
import os
import time
import numpy as np
import pandas as pd
from collections import Counter
import matplotlib.pyplot as plt
from cuml.linear_model import LogisticRegression as cuMLLogisticRegression
print('NOVA_HOME is at', os.getenv('NOVA_HOME'))
sys.path.insert(1, os.getenv('NOVA_HOME'))
%load_ext autoreload
%autoreload 2
from utils import *
NOVA_HOME is at /home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA NOVA_HOME: /home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.svm import LinearSVC
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
import itertools
from cuml.linear_model import LogisticRegression as cuLogisticRegression
dataset_config = {
"path_to_embeddings": "/home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen",
"multiplexed": False,
"config_fmt": "newNeuronsD8FigureConfig_UMAP1_B{batch}",
"config_dir": "manuscript/manuscript_figures_data_config",
}
## Baseline
run_baseline_model(
dataset_config= dataset_config,
batches=[1, 2, 3, 7, 8, 9],
balance=False,
norm=False,
choose_features=False,
top_k=100,
label_map=None,
classifier_class=cuMLLogisticRegression,
classifier_kwargs={},
)
2025-08-19 11:59:17 INFO: [load_embeddings] multiplex=False 2025-08-19 11:59:17 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 11:59:17 INFO: [load_embeddings] input_folders = ['batch2'] 2025-08-19 11:59:17 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-19 11:59:21 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 11:59:22 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 11:59:23 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 11:59:23 INFO: [load_embeddings] embeddings shape: (141079, 192) 2025-08-19 11:59:23 INFO: [load_embeddings] labels shape: (141079,) 2025-08-19 11:59:23 INFO: [load_embeddings] example label: HNRNPA1_WT_Untreated 2025-08-19 11:59:23 INFO: [load_embeddings] paths shape: (141079,) 2025-08-19 11:59:23 INFO: [load_embeddings] multiplex=False 2025-08-19 11:59:23 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 11:59:23 INFO: [load_embeddings] input_folders = ['batch3'] 2025-08-19 11:59:23 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-19 11:59:27 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 11:59:29 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 11:59:30 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 11:59:30 INFO: [load_embeddings] embeddings shape: (134336, 192) 2025-08-19 11:59:30 INFO: [load_embeddings] labels shape: (134336,) 2025-08-19 11:59:30 INFO: [load_embeddings] example label: TOMM20_WT_Untreated 2025-08-19 11:59:30 INFO: [load_embeddings] paths shape: (134336,) 2025-08-19 11:59:30 INFO: [load_embeddings] multiplex=False 2025-08-19 11:59:30 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 11:59:30 INFO: [load_embeddings] input_folders = ['batch7'] 2025-08-19 11:59:30 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-19 11:59:35 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 11:59:37 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 11:59:38 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 11:59:39 INFO: [load_embeddings] embeddings shape: (189079, 192) 2025-08-19 11:59:39 INFO: [load_embeddings] labels shape: (189079,) 2025-08-19 11:59:39 INFO: [load_embeddings] example label: DAPI_WT_Untreated 2025-08-19 11:59:39 INFO: [load_embeddings] paths shape: (189079,) 2025-08-19 11:59:39 INFO: [load_embeddings] multiplex=False 2025-08-19 11:59:39 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 11:59:39 INFO: [load_embeddings] input_folders = ['batch8'] 2025-08-19 11:59:39 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-19 11:59:45 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 11:59:47 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 11:59:48 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 11:59:49 INFO: [load_embeddings] embeddings shape: (169304, 192) 2025-08-19 11:59:49 INFO: [load_embeddings] labels shape: (169304,) 2025-08-19 11:59:49 INFO: [load_embeddings] example label: DCP1A_WT_Untreated 2025-08-19 11:59:49 INFO: [load_embeddings] paths shape: (169304,) 2025-08-19 11:59:49 INFO: [load_embeddings] multiplex=False 2025-08-19 11:59:49 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 11:59:49 INFO: [load_embeddings] input_folders = ['batch9'] 2025-08-19 11:59:49 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-19 11:59:54 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 11:59:56 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 11:59:58 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 11:59:58 INFO: [load_embeddings] embeddings shape: (196652, 192) 2025-08-19 11:59:58 INFO: [load_embeddings] labels shape: (196652,) 2025-08-19 11:59:58 INFO: [load_embeddings] example label: TIA1_WT_Untreated 2025-08-19 11:59:58 INFO: [load_embeddings] paths shape: (196652,) 2025-08-19 11:59:59 INFO: [load_embeddings] multiplex=False 2025-08-19 11:59:59 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 11:59:59 INFO: [load_embeddings] input_folders = ['batch1'] 2025-08-19 11:59:59 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-19 12:00:04 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 12:00:06 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 12:00:08 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 12:00:08 INFO: [load_embeddings] embeddings shape: (196119, 192) 2025-08-19 12:00:08 INFO: [load_embeddings] labels shape: (196119,) 2025-08-19 12:00:08 INFO: [load_embeddings] example label: DAPI_WT_Untreated 2025-08-19 12:00:08 INFO: [load_embeddings] paths shape: (196119,)
=== Batch 1 === Train: (830450, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28] Test: (196119, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28] HNRNPA1_WT_Untreated: 24526 NONO_WT_Untreated: 20846 KIF5A_WT_Untreated: 20974 TOMM20_WT_Untreated: 21633 DAPI_WT_Untreated: 271498 DCP1A_WT_Untreated: 25887 LSM14A_WT_Untreated: 25124 ANXA11_WT_Untreated: 19584 PSD95_WT_Untreated: 21489 NCL_WT_Untreated: 25750 TDP43_WT_Untreated: 19716 Phalloidin_WT_Untreated: 22312 SQSTM1_WT_Untreated: 19138 SNCA_WT_Untreated: 11037 TIA1_WT_Untreated: 16541 GM130_WT_Untreated: 25126 Calreticulin_WT_Untreated: 25894 PEX14_WT_Untreated: 20145 PML_WT_Untreated: 19937 G3BP1_WT_Untreated: 9447 Tubulin_WT_Untreated: 24440 FUS_WT_Untreated: 19636 CLTC_WT_Untreated: 24225 NEMO_WT_Untreated: 24282 SON_WT_Untreated: 22340 PURA_WT_Untreated: 8626 mitotracker_WT_Untreated: 21876 LAMP1_WT_Untreated: 9717 FMRP_WT_Untreated: 8704
2025-08-19 12:00:32 INFO: [load_embeddings] multiplex=False 2025-08-19 12:00:32 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 12:00:32 INFO: [load_embeddings] input_folders = ['batch1'] 2025-08-19 12:00:32 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
precision recall f1-score support
0 0.99 0.98 0.98 4777
1 0.95 0.96 0.96 5749
2 0.97 0.98 0.97 6639
3 1.00 1.00 1.00 63181
4 0.96 0.95 0.96 5177
5 0.97 0.97 0.97 2678
6 0.99 0.98 0.98 6586
7 0.98 0.92 0.95 2689
8 0.98 0.99 0.99 6376
9 0.98 0.99 0.98 6023
10 0.95 0.97 0.96 4799
11 0.97 0.89 0.93 1561
12 0.97 0.99 0.98 6008
13 1.00 0.99 0.99 6573
14 0.99 0.99 0.99 5115
15 0.90 0.98 0.93 4542
16 0.99 0.99 0.99 4830
17 0.99 1.00 1.00 4895
18 0.75 0.97 0.84 5739
19 0.96 0.91 0.93 2650
20 0.96 0.97 0.96 5431
21 0.97 0.94 0.95 2368
22 0.99 1.00 0.99 4987
23 0.79 0.48 0.60 4527
24 0.97 0.90 0.94 4683
25 0.99 0.99 0.99 3899
26 0.96 0.94 0.95 4400
27 0.66 0.91 0.76 4873
28 0.91 0.59 0.72 4364
accuracy 0.96 196119
macro avg 0.95 0.93 0.94 196119
weighted avg 0.96 0.96 0.96 196119
2025-08-19 12:00:37 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 12:00:39 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 12:00:41 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 12:00:41 INFO: [load_embeddings] embeddings shape: (196119, 192) 2025-08-19 12:00:41 INFO: [load_embeddings] labels shape: (196119,) 2025-08-19 12:00:41 INFO: [load_embeddings] example label: DAPI_WT_Untreated 2025-08-19 12:00:41 INFO: [load_embeddings] paths shape: (196119,) 2025-08-19 12:00:41 INFO: [load_embeddings] multiplex=False 2025-08-19 12:00:41 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 12:00:41 INFO: [load_embeddings] input_folders = ['batch3'] 2025-08-19 12:00:41 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-19 12:00:45 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 12:00:47 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 12:00:48 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 12:00:48 INFO: [load_embeddings] embeddings shape: (134336, 192) 2025-08-19 12:00:48 INFO: [load_embeddings] labels shape: (134336,) 2025-08-19 12:00:48 INFO: [load_embeddings] example label: TOMM20_WT_Untreated 2025-08-19 12:00:48 INFO: [load_embeddings] paths shape: (134336,) 2025-08-19 12:00:48 INFO: [load_embeddings] multiplex=False 2025-08-19 12:00:48 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 12:00:48 INFO: [load_embeddings] input_folders = ['batch7'] 2025-08-19 12:00:48 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-19 12:00:54 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 12:00:56 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 12:00:57 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 12:00:57 INFO: [load_embeddings] embeddings shape: (189079, 192) 2025-08-19 12:00:57 INFO: [load_embeddings] labels shape: (189079,) 2025-08-19 12:00:57 INFO: [load_embeddings] example label: DAPI_WT_Untreated 2025-08-19 12:00:57 INFO: [load_embeddings] paths shape: (189079,) 2025-08-19 12:00:58 INFO: [load_embeddings] multiplex=False 2025-08-19 12:00:58 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 12:00:58 INFO: [load_embeddings] input_folders = ['batch8'] 2025-08-19 12:00:58 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-19 12:01:03 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 12:01:06 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 12:01:07 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 12:01:08 INFO: [load_embeddings] embeddings shape: (169304, 192) 2025-08-19 12:01:08 INFO: [load_embeddings] labels shape: (169304,) 2025-08-19 12:01:08 INFO: [load_embeddings] example label: DCP1A_WT_Untreated 2025-08-19 12:01:08 INFO: [load_embeddings] paths shape: (169304,) 2025-08-19 12:01:08 INFO: [load_embeddings] multiplex=False 2025-08-19 12:01:08 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 12:01:08 INFO: [load_embeddings] input_folders = ['batch9'] 2025-08-19 12:01:08 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-19 12:01:14 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 12:01:16 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 12:01:17 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 12:01:18 INFO: [load_embeddings] embeddings shape: (196652, 192) 2025-08-19 12:01:18 INFO: [load_embeddings] labels shape: (196652,) 2025-08-19 12:01:18 INFO: [load_embeddings] example label: TIA1_WT_Untreated 2025-08-19 12:01:18 INFO: [load_embeddings] paths shape: (196652,) 2025-08-19 12:01:18 INFO: [load_embeddings] multiplex=False 2025-08-19 12:01:18 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 12:01:18 INFO: [load_embeddings] input_folders = ['batch2'] 2025-08-19 12:01:18 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-19 12:01:22 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 12:01:24 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 12:01:25 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 12:01:25 INFO: [load_embeddings] embeddings shape: (141079, 192) 2025-08-19 12:01:25 INFO: [load_embeddings] labels shape: (141079,) 2025-08-19 12:01:25 INFO: [load_embeddings] example label: HNRNPA1_WT_Untreated 2025-08-19 12:01:25 INFO: [load_embeddings] paths shape: (141079,)
=== Batch 2 === Train: (885490, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28] Test: (141079, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28] DAPI_WT_Untreated: 289648 PEX14_WT_Untreated: 21109 Calreticulin_WT_Untreated: 29143 TDP43_WT_Untreated: 20703 TIA1_WT_Untreated: 16825 TOMM20_WT_Untreated: 22332 HNRNPA1_WT_Untreated: 26333 KIF5A_WT_Untreated: 21917 LSM14A_WT_Untreated: 26961 SON_WT_Untreated: 22776 CLTC_WT_Untreated: 25663 GM130_WT_Untreated: 28274 NEMO_WT_Untreated: 25445 Tubulin_WT_Untreated: 25416 PURA_WT_Untreated: 10490 FMRP_WT_Untreated: 10444 DCP1A_WT_Untreated: 27007 NONO_WT_Untreated: 20846 PML_WT_Untreated: 21075 mitotracker_WT_Untreated: 22238 SQSTM1_WT_Untreated: 20119 ANXA11_WT_Untreated: 20574 Phalloidin_WT_Untreated: 23886 G3BP1_WT_Untreated: 11183 PSD95_WT_Untreated: 22936 FUS_WT_Untreated: 22609 SNCA_WT_Untreated: 10936 NCL_WT_Untreated: 28699 LAMP1_WT_Untreated: 9903
2025-08-19 12:01:33 INFO: [load_embeddings] multiplex=False 2025-08-19 12:01:33 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 12:01:33 INFO: [load_embeddings] input_folders = ['batch1'] 2025-08-19 12:01:33 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
precision recall f1-score support
0 0.95 0.99 0.97 3787
1 0.95 0.95 0.95 4311
2 0.89 0.88 0.88 3390
3 1.00 1.00 1.00 45031
4 0.97 0.93 0.95 4057
5 0.94 0.97 0.95 938
6 0.99 0.91 0.95 3613
7 0.91 0.96 0.94 953
8 0.98 0.97 0.97 3228
9 0.93 0.99 0.96 4216
10 0.96 0.97 0.96 3856
11 0.95 0.91 0.93 1375
12 0.94 0.98 0.96 4171
13 0.99 0.98 0.98 3624
14 0.99 0.99 0.99 3952
15 0.89 0.91 0.90 4542
16 0.99 0.99 0.99 3866
17 0.99 1.00 1.00 3757
18 0.96 0.79 0.87 4292
19 0.91 0.93 0.92 786
20 0.94 0.88 0.91 3857
21 0.96 0.92 0.94 2469
22 0.99 1.00 0.99 4551
23 0.65 0.83 0.73 3546
24 0.89 0.86 0.88 3696
25 1.00 0.98 0.99 3615
26 0.93 0.94 0.93 3701
27 0.88 0.66 0.75 3897
28 0.79 0.98 0.87 4002
accuracy 0.95 141079
macro avg 0.93 0.93 0.93 141079
weighted avg 0.95 0.95 0.95 141079
2025-08-19 12:01:38 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 12:01:40 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 12:01:41 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 12:01:42 INFO: [load_embeddings] embeddings shape: (196119, 192) 2025-08-19 12:01:42 INFO: [load_embeddings] labels shape: (196119,) 2025-08-19 12:01:42 INFO: [load_embeddings] example label: DAPI_WT_Untreated 2025-08-19 12:01:42 INFO: [load_embeddings] paths shape: (196119,) 2025-08-19 12:01:42 INFO: [load_embeddings] multiplex=False 2025-08-19 12:01:42 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 12:01:42 INFO: [load_embeddings] input_folders = ['batch2'] 2025-08-19 12:01:42 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-19 12:01:46 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 12:01:47 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 12:01:48 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 12:01:48 INFO: [load_embeddings] embeddings shape: (141079, 192) 2025-08-19 12:01:48 INFO: [load_embeddings] labels shape: (141079,) 2025-08-19 12:01:48 INFO: [load_embeddings] example label: HNRNPA1_WT_Untreated 2025-08-19 12:01:48 INFO: [load_embeddings] paths shape: (141079,) 2025-08-19 12:01:48 INFO: [load_embeddings] multiplex=False 2025-08-19 12:01:49 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 12:01:49 INFO: [load_embeddings] input_folders = ['batch7'] 2025-08-19 12:01:49 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-19 12:01:53 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 12:01:55 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 12:01:57 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 12:01:57 INFO: [load_embeddings] embeddings shape: (189079, 192) 2025-08-19 12:01:57 INFO: [load_embeddings] labels shape: (189079,) 2025-08-19 12:01:57 INFO: [load_embeddings] example label: DAPI_WT_Untreated 2025-08-19 12:01:57 INFO: [load_embeddings] paths shape: (189079,) 2025-08-19 12:01:57 INFO: [load_embeddings] multiplex=False 2025-08-19 12:01:57 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 12:01:57 INFO: [load_embeddings] input_folders = ['batch8'] 2025-08-19 12:01:57 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-19 12:02:03 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 12:02:05 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 12:02:07 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 12:02:07 INFO: [load_embeddings] embeddings shape: (169304, 192) 2025-08-19 12:02:07 INFO: [load_embeddings] labels shape: (169304,) 2025-08-19 12:02:07 INFO: [load_embeddings] example label: DCP1A_WT_Untreated 2025-08-19 12:02:07 INFO: [load_embeddings] paths shape: (169304,) 2025-08-19 12:02:07 INFO: [load_embeddings] multiplex=False 2025-08-19 12:02:07 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 12:02:07 INFO: [load_embeddings] input_folders = ['batch9'] 2025-08-19 12:02:07 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-19 12:02:13 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 12:02:15 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 12:02:16 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 12:02:16 INFO: [load_embeddings] embeddings shape: (196652, 192) 2025-08-19 12:02:17 INFO: [load_embeddings] labels shape: (196652,) 2025-08-19 12:02:17 INFO: [load_embeddings] example label: TIA1_WT_Untreated 2025-08-19 12:02:17 INFO: [load_embeddings] paths shape: (196652,) 2025-08-19 12:02:17 INFO: [load_embeddings] multiplex=False 2025-08-19 12:02:17 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 12:02:17 INFO: [load_embeddings] input_folders = ['batch3'] 2025-08-19 12:02:17 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-19 12:02:21 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 12:02:22 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 12:02:23 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 12:02:24 INFO: [load_embeddings] embeddings shape: (134336, 192) 2025-08-19 12:02:24 INFO: [load_embeddings] labels shape: (134336,) 2025-08-19 12:02:24 INFO: [load_embeddings] example label: TOMM20_WT_Untreated 2025-08-19 12:02:24 INFO: [load_embeddings] paths shape: (134336,)
=== Batch 3 === Train: (892233, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28] Test: (134336, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28] DAPI_WT_Untreated: 291501 PEX14_WT_Untreated: 21028 Calreticulin_WT_Untreated: 28546 TDP43_WT_Untreated: 20828 TIA1_WT_Untreated: 16892 TOMM20_WT_Untreated: 22202 HNRNPA1_WT_Untreated: 26716 KIF5A_WT_Untreated: 22810 LSM14A_WT_Untreated: 27335 SON_WT_Untreated: 23673 CLTC_WT_Untreated: 26550 GM130_WT_Untreated: 27676 NEMO_WT_Untreated: 25953 Tubulin_WT_Untreated: 26110 PURA_WT_Untreated: 9719 FMRP_WT_Untreated: 9628 DCP1A_WT_Untreated: 27588 NONO_WT_Untreated: 21731 PML_WT_Untreated: 21828 mitotracker_WT_Untreated: 22251 SQSTM1_WT_Untreated: 20505 ANXA11_WT_Untreated: 20494 Phalloidin_WT_Untreated: 24168 G3BP1_WT_Untreated: 10290 PSD95_WT_Untreated: 23714 FUS_WT_Untreated: 22519 SNCA_WT_Untreated: 10991 NCL_WT_Untreated: 28627 LAMP1_WT_Untreated: 10360
2025-08-19 12:02:32 INFO: [load_embeddings] multiplex=False 2025-08-19 12:02:32 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 12:02:32 INFO: [load_embeddings] input_folders = ['batch1'] 2025-08-19 12:02:32 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
precision recall f1-score support
0 0.95 0.99 0.97 3867
1 0.87 0.95 0.91 3424
2 0.98 0.88 0.93 3987
3 1.00 1.00 1.00 43178
4 0.93 0.90 0.91 3476
5 0.91 0.94 0.92 1754
6 0.99 0.98 0.99 3703
7 0.94 0.87 0.90 1846
8 0.98 0.96 0.97 3826
9 0.99 0.99 0.99 3833
10 0.97 0.92 0.94 2963
11 0.94 0.93 0.93 918
12 0.94 0.97 0.95 3797
13 0.99 0.99 0.99 3696
14 0.97 0.98 0.98 3444
15 0.96 0.76 0.85 3657
16 0.97 0.99 0.98 3947
17 0.99 0.99 0.99 3004
18 0.95 0.94 0.94 3514
19 0.90 0.89 0.90 1557
20 0.77 0.88 0.83 3575
21 0.92 0.95 0.93 2414
22 0.99 1.00 0.99 3654
23 0.66 0.59 0.62 3160
24 0.80 0.97 0.87 3571
25 0.98 0.99 0.98 3548
26 0.79 0.97 0.87 3831
27 0.80 0.67 0.73 3203
28 0.87 0.77 0.82 3989
accuracy 0.94 134336
macro avg 0.92 0.92 0.92 134336
weighted avg 0.94 0.94 0.94 134336
2025-08-19 12:02:38 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 12:02:40 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 12:02:41 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 12:02:42 INFO: [load_embeddings] embeddings shape: (196119, 192) 2025-08-19 12:02:42 INFO: [load_embeddings] labels shape: (196119,) 2025-08-19 12:02:42 INFO: [load_embeddings] example label: DAPI_WT_Untreated 2025-08-19 12:02:42 INFO: [load_embeddings] paths shape: (196119,) 2025-08-19 12:02:42 INFO: [load_embeddings] multiplex=False 2025-08-19 12:02:42 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 12:02:42 INFO: [load_embeddings] input_folders = ['batch2'] 2025-08-19 12:02:42 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-19 12:02:46 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 12:02:47 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 12:02:48 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 12:02:49 INFO: [load_embeddings] embeddings shape: (141079, 192) 2025-08-19 12:02:49 INFO: [load_embeddings] labels shape: (141079,) 2025-08-19 12:02:49 INFO: [load_embeddings] example label: HNRNPA1_WT_Untreated 2025-08-19 12:02:49 INFO: [load_embeddings] paths shape: (141079,) 2025-08-19 12:02:49 INFO: [load_embeddings] multiplex=False 2025-08-19 12:02:49 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 12:02:49 INFO: [load_embeddings] input_folders = ['batch3'] 2025-08-19 12:02:49 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-19 12:02:53 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 12:02:54 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 12:02:55 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 12:02:56 INFO: [load_embeddings] embeddings shape: (134336, 192) 2025-08-19 12:02:56 INFO: [load_embeddings] labels shape: (134336,) 2025-08-19 12:02:56 INFO: [load_embeddings] example label: TOMM20_WT_Untreated 2025-08-19 12:02:56 INFO: [load_embeddings] paths shape: (134336,) 2025-08-19 12:02:56 INFO: [load_embeddings] multiplex=False 2025-08-19 12:02:56 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 12:02:56 INFO: [load_embeddings] input_folders = ['batch8'] 2025-08-19 12:02:56 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-19 12:03:02 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 12:03:04 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 12:03:06 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 12:03:06 INFO: [load_embeddings] embeddings shape: (169304, 192) 2025-08-19 12:03:06 INFO: [load_embeddings] labels shape: (169304,) 2025-08-19 12:03:06 INFO: [load_embeddings] example label: DCP1A_WT_Untreated 2025-08-19 12:03:06 INFO: [load_embeddings] paths shape: (169304,) 2025-08-19 12:03:06 INFO: [load_embeddings] multiplex=False 2025-08-19 12:03:06 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 12:03:06 INFO: [load_embeddings] input_folders = ['batch9'] 2025-08-19 12:03:06 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-19 12:03:12 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 12:03:14 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 12:03:16 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 12:03:16 INFO: [load_embeddings] embeddings shape: (196652, 192) 2025-08-19 12:03:16 INFO: [load_embeddings] labels shape: (196652,) 2025-08-19 12:03:16 INFO: [load_embeddings] example label: TIA1_WT_Untreated 2025-08-19 12:03:16 INFO: [load_embeddings] paths shape: (196652,) 2025-08-19 12:03:16 INFO: [load_embeddings] multiplex=False 2025-08-19 12:03:16 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 12:03:16 INFO: [load_embeddings] input_folders = ['batch7'] 2025-08-19 12:03:16 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-19 12:03:22 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 12:03:23 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 12:03:25 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 12:03:25 INFO: [load_embeddings] embeddings shape: (189079, 192) 2025-08-19 12:03:25 INFO: [load_embeddings] labels shape: (189079,) 2025-08-19 12:03:25 INFO: [load_embeddings] example label: DAPI_WT_Untreated 2025-08-19 12:03:25 INFO: [load_embeddings] paths shape: (189079,)
=== Batch 7 === Train: (837490, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28] Test: (189079, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28] DAPI_WT_Untreated: 270193 PEX14_WT_Untreated: 20964 Calreticulin_WT_Untreated: 26534 TDP43_WT_Untreated: 19874 TIA1_WT_Untreated: 17201 TOMM20_WT_Untreated: 21012 HNRNPA1_WT_Untreated: 25487 KIF5A_WT_Untreated: 20280 LSM14A_WT_Untreated: 25505 SON_WT_Untreated: 21753 CLTC_WT_Untreated: 23804 GM130_WT_Untreated: 25432 NEMO_WT_Untreated: 25027 Tubulin_WT_Untreated: 22723 PURA_WT_Untreated: 9113 FMRP_WT_Untreated: 9656 DCP1A_WT_Untreated: 25278 NONO_WT_Untreated: 20764 PML_WT_Untreated: 19880 mitotracker_WT_Untreated: 21332 SQSTM1_WT_Untreated: 18580 ANXA11_WT_Untreated: 19572 Phalloidin_WT_Untreated: 22211 G3BP1_WT_Untreated: 10348 PSD95_WT_Untreated: 22164 FUS_WT_Untreated: 26185 SNCA_WT_Untreated: 11279 NCL_WT_Untreated: 26158 LAMP1_WT_Untreated: 9181
2025-08-19 12:03:43 INFO: [load_embeddings] multiplex=False 2025-08-19 12:03:43 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 12:03:43 INFO: [load_embeddings] input_folders = ['batch1'] 2025-08-19 12:03:43 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
precision recall f1-score support
0 0.98 0.96 0.97 4789
1 0.70 0.95 0.80 6170
2 0.80 0.75 0.77 5999
3 1.00 1.00 1.00 64486
4 0.96 0.82 0.88 5786
5 0.95 0.94 0.95 1726
6 0.00 0.00 0.00 37
7 0.92 0.97 0.94 1788
8 0.46 0.55 0.50 6070
9 0.98 0.05 0.10 5062
10 0.97 0.96 0.96 5493
11 0.63 0.97 0.76 2097
12 0.97 0.61 0.75 5627
13 0.96 0.09 0.17 6165
14 0.93 0.98 0.95 4370
15 0.95 0.92 0.94 4624
16 0.98 0.98 0.98 4011
17 0.66 1.00 0.79 4952
18 0.90 0.97 0.93 5064
19 0.90 0.95 0.92 2163
20 0.94 0.95 0.95 5532
21 0.67 0.89 0.77 2126
22 0.94 1.00 0.97 5574
23 0.57 0.62 0.59 5085
24 0.92 0.88 0.90 4525
25 0.94 0.97 0.96 3239
26 0.98 0.93 0.95 5021
27 0.73 0.82 0.77 6590
28 0.94 0.78 0.86 4908
accuracy 0.87 189079
macro avg 0.84 0.80 0.79 189079
weighted avg 0.91 0.87 0.86 189079
2025-08-19 12:03:49 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 12:03:51 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 12:03:53 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 12:03:54 INFO: [load_embeddings] embeddings shape: (196119, 192) 2025-08-19 12:03:54 INFO: [load_embeddings] labels shape: (196119,) 2025-08-19 12:03:54 INFO: [load_embeddings] example label: DAPI_WT_Untreated 2025-08-19 12:03:54 INFO: [load_embeddings] paths shape: (196119,) 2025-08-19 12:03:54 INFO: [load_embeddings] multiplex=False 2025-08-19 12:03:54 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 12:03:54 INFO: [load_embeddings] input_folders = ['batch2'] 2025-08-19 12:03:54 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-19 12:03:58 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 12:03:59 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 12:04:00 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 12:04:01 INFO: [load_embeddings] embeddings shape: (141079, 192) 2025-08-19 12:04:01 INFO: [load_embeddings] labels shape: (141079,) 2025-08-19 12:04:01 INFO: [load_embeddings] example label: HNRNPA1_WT_Untreated 2025-08-19 12:04:01 INFO: [load_embeddings] paths shape: (141079,) 2025-08-19 12:04:01 INFO: [load_embeddings] multiplex=False 2025-08-19 12:04:01 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 12:04:01 INFO: [load_embeddings] input_folders = ['batch3'] 2025-08-19 12:04:01 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-19 12:04:05 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 12:04:07 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 12:04:08 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 12:04:09 INFO: [load_embeddings] embeddings shape: (134336, 192) 2025-08-19 12:04:09 INFO: [load_embeddings] labels shape: (134336,) 2025-08-19 12:04:09 INFO: [load_embeddings] example label: TOMM20_WT_Untreated 2025-08-19 12:04:09 INFO: [load_embeddings] paths shape: (134336,) 2025-08-19 12:04:09 INFO: [load_embeddings] multiplex=False 2025-08-19 12:04:09 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 12:04:09 INFO: [load_embeddings] input_folders = ['batch7'] 2025-08-19 12:04:09 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-19 12:04:14 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 12:04:17 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 12:04:18 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 12:04:19 INFO: [load_embeddings] embeddings shape: (189079, 192) 2025-08-19 12:04:19 INFO: [load_embeddings] labels shape: (189079,) 2025-08-19 12:04:19 INFO: [load_embeddings] example label: DAPI_WT_Untreated 2025-08-19 12:04:19 INFO: [load_embeddings] paths shape: (189079,) 2025-08-19 12:04:19 INFO: [load_embeddings] multiplex=False 2025-08-19 12:04:19 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 12:04:19 INFO: [load_embeddings] input_folders = ['batch9'] 2025-08-19 12:04:19 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-19 12:04:25 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 12:04:27 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 12:04:29 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 12:04:30 INFO: [load_embeddings] embeddings shape: (196652, 192) 2025-08-19 12:04:30 INFO: [load_embeddings] labels shape: (196652,) 2025-08-19 12:04:30 INFO: [load_embeddings] example label: TIA1_WT_Untreated 2025-08-19 12:04:30 INFO: [load_embeddings] paths shape: (196652,) 2025-08-19 12:04:30 INFO: [load_embeddings] multiplex=False 2025-08-19 12:04:30 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 12:04:30 INFO: [load_embeddings] input_folders = ['batch8'] 2025-08-19 12:04:30 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-19 12:04:37 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 12:04:39 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 12:04:41 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 12:04:41 INFO: [load_embeddings] embeddings shape: (169304, 192) 2025-08-19 12:04:41 INFO: [load_embeddings] labels shape: (169304,) 2025-08-19 12:04:41 INFO: [load_embeddings] example label: DCP1A_WT_Untreated 2025-08-19 12:04:41 INFO: [load_embeddings] paths shape: (169304,)
=== Batch 8 === Train: (857265, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28] Test: (169304, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28] DAPI_WT_Untreated: 278916 PEX14_WT_Untreated: 20900 Calreticulin_WT_Untreated: 26659 TDP43_WT_Untreated: 20633 TIA1_WT_Untreated: 17450 TOMM20_WT_Untreated: 22224 HNRNPA1_WT_Untreated: 25041 KIF5A_WT_Untreated: 22126 LSM14A_WT_Untreated: 25648 SON_WT_Untreated: 22072 CLTC_WT_Untreated: 26025 GM130_WT_Untreated: 25865 NEMO_WT_Untreated: 23656 Tubulin_WT_Untreated: 24531 PURA_WT_Untreated: 9460 FMRP_WT_Untreated: 9449 DCP1A_WT_Untreated: 25323 NONO_WT_Untreated: 20689 PML_WT_Untreated: 21457 mitotracker_WT_Untreated: 22540 SQSTM1_WT_Untreated: 20396 ANXA11_WT_Untreated: 20291 Phalloidin_WT_Untreated: 23674 G3BP1_WT_Untreated: 9971 PSD95_WT_Untreated: 24965 FUS_WT_Untreated: 20373 SNCA_WT_Untreated: 11895 NCL_WT_Untreated: 26475 LAMP1_WT_Untreated: 8561
2025-08-19 12:05:03 INFO: [load_embeddings] multiplex=False 2025-08-19 12:05:03 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 12:05:03 INFO: [load_embeddings] input_folders = ['batch1'] 2025-08-19 12:05:03 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
precision recall f1-score support
0 0.96 0.97 0.97 4070
1 0.94 0.73 0.82 3949
2 0.94 0.90 0.92 5874
3 1.00 1.00 1.00 55763
4 0.96 0.97 0.97 5741
5 0.85 0.97 0.90 1933
6 0.97 0.56 0.71 5849
7 0.93 0.96 0.95 2165
8 0.98 0.69 0.81 5637
9 0.67 0.99 0.80 5508
10 0.94 0.97 0.96 3647
11 0.97 0.91 0.94 2717
12 0.94 0.97 0.96 5484
13 0.93 0.98 0.95 5848
14 0.99 1.00 0.99 5741
15 0.93 0.94 0.94 4699
16 0.89 0.98 0.93 4075
17 0.93 0.99 0.96 3375
18 0.86 0.95 0.90 2263
19 0.89 0.96 0.92 1816
20 0.90 0.93 0.91 4069
21 0.90 0.94 0.92 1510
22 0.95 1.00 0.97 5255
23 0.62 0.66 0.64 3269
24 0.93 0.90 0.92 3766
25 0.98 0.95 0.97 2990
26 0.85 0.94 0.89 3809
27 0.79 0.72 0.75 4782
28 0.85 0.89 0.87 3700
accuracy 0.93 169304
macro avg 0.91 0.91 0.90 169304
weighted avg 0.94 0.93 0.93 169304
2025-08-19 12:05:08 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 12:05:10 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 12:05:12 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 12:05:13 INFO: [load_embeddings] embeddings shape: (196119, 192) 2025-08-19 12:05:13 INFO: [load_embeddings] labels shape: (196119,) 2025-08-19 12:05:13 INFO: [load_embeddings] example label: DAPI_WT_Untreated 2025-08-19 12:05:13 INFO: [load_embeddings] paths shape: (196119,) 2025-08-19 12:05:13 INFO: [load_embeddings] multiplex=False 2025-08-19 12:05:13 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 12:05:13 INFO: [load_embeddings] input_folders = ['batch2'] 2025-08-19 12:05:13 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-19 12:05:17 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 12:05:18 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 12:05:20 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 12:05:20 INFO: [load_embeddings] embeddings shape: (141079, 192) 2025-08-19 12:05:20 INFO: [load_embeddings] labels shape: (141079,) 2025-08-19 12:05:20 INFO: [load_embeddings] example label: HNRNPA1_WT_Untreated 2025-08-19 12:05:20 INFO: [load_embeddings] paths shape: (141079,) 2025-08-19 12:05:20 INFO: [load_embeddings] multiplex=False 2025-08-19 12:05:20 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 12:05:20 INFO: [load_embeddings] input_folders = ['batch3'] 2025-08-19 12:05:20 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-19 12:05:24 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 12:05:26 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 12:05:27 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 12:05:27 INFO: [load_embeddings] embeddings shape: (134336, 192) 2025-08-19 12:05:27 INFO: [load_embeddings] labels shape: (134336,) 2025-08-19 12:05:27 INFO: [load_embeddings] example label: TOMM20_WT_Untreated 2025-08-19 12:05:27 INFO: [load_embeddings] paths shape: (134336,) 2025-08-19 12:05:28 INFO: [load_embeddings] multiplex=False 2025-08-19 12:05:28 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 12:05:28 INFO: [load_embeddings] input_folders = ['batch7'] 2025-08-19 12:05:28 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-19 12:05:33 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 12:05:35 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 12:05:37 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 12:05:37 INFO: [load_embeddings] embeddings shape: (189079, 192) 2025-08-19 12:05:37 INFO: [load_embeddings] labels shape: (189079,) 2025-08-19 12:05:37 INFO: [load_embeddings] example label: DAPI_WT_Untreated 2025-08-19 12:05:37 INFO: [load_embeddings] paths shape: (189079,) 2025-08-19 12:05:37 INFO: [load_embeddings] multiplex=False 2025-08-19 12:05:37 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 12:05:37 INFO: [load_embeddings] input_folders = ['batch8'] 2025-08-19 12:05:37 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-19 12:05:44 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 12:05:46 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 12:05:47 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 12:05:48 INFO: [load_embeddings] embeddings shape: (169304, 192) 2025-08-19 12:05:48 INFO: [load_embeddings] labels shape: (169304,) 2025-08-19 12:05:48 INFO: [load_embeddings] example label: DCP1A_WT_Untreated 2025-08-19 12:05:48 INFO: [load_embeddings] paths shape: (169304,) 2025-08-19 12:05:49 INFO: [load_embeddings] multiplex=False 2025-08-19 12:05:49 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 12:05:49 INFO: [load_embeddings] input_folders = ['batch9'] 2025-08-19 12:05:49 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-19 12:05:54 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 12:05:57 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 12:05:58 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 12:05:59 INFO: [load_embeddings] embeddings shape: (196652, 192) 2025-08-19 12:05:59 INFO: [load_embeddings] labels shape: (196652,) 2025-08-19 12:05:59 INFO: [load_embeddings] example label: TIA1_WT_Untreated 2025-08-19 12:05:59 INFO: [load_embeddings] paths shape: (196652,)
=== Batch 9 ===
Train: (829917, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27 28]
Test: (196652, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27 28]
DAPI_WT_Untreated: 271639
PEX14_WT_Untreated: 20729
Calreticulin_WT_Untreated: 25889
TDP43_WT_Untreated: 20241
TIA1_WT_Untreated: 17291
TOMM20_WT_Untreated: 20762
HNRNPA1_WT_Untreated: 24642
KIF5A_WT_Untreated: 20758
LSM14A_WT_Untreated: 25087
SON_WT_Untreated: 24021
CLTC_WT_Untreated: 23603
GM130_WT_Untreated: 25137
NEMO_WT_Untreated: 22622
Tubulin_WT_Untreated: 23345
PURA_WT_Untreated: 8972
FMRP_WT_Untreated: 9029
DCP1A_WT_Untreated: 24237
NONO_WT_Untreated: 22064
PML_WT_Untreated: 19983
mitotracker_WT_Untreated: 20963
SQSTM1_WT_Untreated: 19587
ANXA11_WT_Untreated: 21290
Phalloidin_WT_Untreated: 22464
G3BP1_WT_Untreated: 9441
PSD95_WT_Untreated: 20872
FUS_WT_Untreated: 19788
SNCA_WT_Untreated: 10887
NCL_WT_Untreated: 25906
LAMP1_WT_Untreated: 8668
precision recall f1-score support
0 0.99 0.68 0.80 3071
1 0.98 0.92 0.95 6371
2 0.87 0.97 0.92 6644
3 1.00 1.00 1.00 63040
4 0.95 0.98 0.96 6827
5 0.98 0.99 0.98 2353
6 0.84 0.87 0.86 6434
7 0.92 0.99 0.95 2695
8 0.98 0.99 0.99 6365
9 0.85 0.82 0.84 5907
10 0.98 0.97 0.97 5015
11 0.95 0.98 0.96 2610
12 0.98 0.96 0.97 6045
13 1.00 0.98 0.99 6417
14 0.98 1.00 0.99 6775
15 0.86 0.85 0.86 3324
16 1.00 0.95 0.97 4246
17 0.99 1.00 0.99 4849
18 0.95 0.93 0.94 6356
19 0.98 0.88 0.93 2304
20 0.92 0.96 0.94 5279
21 0.75 0.87 0.80 2518
22 1.00 1.00 1.00 3306
23 0.69 0.43 0.53 4078
24 0.88 0.89 0.89 4158
25 0.92 0.99 0.95 3149
26 0.98 0.94 0.96 5271
27 0.69 0.87 0.77 5968
28 0.90 0.89 0.89 5277
accuracy 0.94 196652
macro avg 0.92 0.91 0.92 196652
weighted avg 0.95 0.94 0.94 196652
=== Overall Accuracy ===
0.9325490639810191 [0.9588260188966903, 0.951268438250909, 0.9434775488327776, 0.8660612759745926, 0.931165241222889, 0.9444958607082562]
=== Evaluation Metrics ===
Label Accuracy Sensitivity Specificity PPV NPV
ANXA11_WT_Untreated 0.997883 0.939535 0.999302 0.970324 0.998531
CLTC_WT_Untreated 0.993837 0.915727 0.996186 0.878364 0.997462
Calreticulin_WT_Untreated 0.993824 0.899825 0.996901 0.904775 0.996722
DAPI_WT_Untreated 0.999661 0.999964 0.999514 0.998997 0.999983
DCP1A_WT_Untreated 0.996571 0.927730 0.998719 0.957633 0.997747
FMRP_WT_Untreated 0.998845 0.962221 0.999255 0.935429 0.999576
FUS_WT_Untreated 0.990330 0.848219 0.994055 0.789031 0.996014
G3BP1_WT_Untreated 0.998591 0.946605 0.999213 0.935048 0.999361
GM130_WT_Untreated 0.990985 0.848232 0.995504 0.856580 0.995197
HNRNPA1_WT_Untreated 0.990137 0.799699 0.995978 0.859122 0.993870
KIF5A_WT_Untreated 0.998070 0.962713 0.998981 0.960514 0.999040
LAMP1_WT_Untreated 0.997765 0.936070 0.998451 0.870322 0.999289
LSM14A_WT_Untreated 0.996061 0.908583 0.998797 0.959368 0.997146
NCL_WT_Untreated 0.993606 0.815364 0.999401 0.977886 0.994030
NEMO_WT_Untreated 0.999026 0.989761 0.999299 0.976540 0.999698
NONO_WT_Untreated 0.995432 0.900110 0.997850 0.913897 0.997468
PEX14_WT_Untreated 0.998721 0.980821 0.999167 0.967075 0.999522
PML_WT_Untreated 0.997045 0.995933 0.997072 0.893978 0.999899
PSD95_WT_Untreated 0.994746 0.924379 0.996663 0.882999 0.997937
PURA_WT_Untreated 0.998321 0.919475 0.999196 0.927039 0.999106
Phalloidin_WT_Untreated 0.995730 0.933821 0.997450 0.910487 0.998161
SNCA_WT_Untreated 0.996782 0.915479 0.997857 0.849685 0.998881
SON_WT_Untreated 0.999166 0.996524 0.999238 0.972815 0.999905
SQSTM1_WT_Untreated 0.983160 0.592267 0.992384 0.647271 0.990398
TDP43_WT_Untreated 0.995215 0.900119 0.997530 0.898719 0.997568
TIA1_WT_Untreated 0.998936 0.979012 0.999341 0.967931 0.999574
TOMM20_WT_Untreated 0.996368 0.943841 0.997734 0.915530 0.998538
Tubulin_WT_Untreated 0.985713 0.789104 0.991492 0.731623 0.993787
mitotracker_WT_Untreated 0.992246 0.814748 0.996902 0.873397 0.995149
Macro Average 0.995268 0.906410 0.997567 0.902841 0.997571
## Baseline
run_baseline_model(
dataset_config= dataset_config,
batches=[1, 2, 3, 7, 8, 9],
balance=False,
norm=False,
choose_features=False,
top_k=100,
label_map=None,
classifier_class=cuMLLogisticRegression,
classifier_kwargs={},
train_specific_batches = [1],
results_csv = 'classification_results-indi.csv'
)
Loading all batches...
2025-08-20 18:05:08 INFO: [newNeuronsD8FigureConfig_UMAP1_B1] Init (log path: /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen/logs/200825_180507_949809_64708_galavir_sysdashboardsysjupyter.log; JOBID: 64708 Username: galavir) JOBNAME: sysdashboardsysjupyter 2025-08-20 18:05:08 INFO: [newNeuronsD8FigureConfig_UMAP1_B1] NOVA_HOME=/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA, NOVA_DATA_HOME=/home/projects/hornsteinlab/Collaboration/NOVA/input 2025-08-20 18:05:08 INFO: [load_embeddings] multiplex=False 2025-08-20 18:05:08 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-20 18:05:08 INFO: [load_embeddings] input_folders = ['batch1'] 2025-08-20 18:05:08 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-20 18:05:18 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-20 18:05:20 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-20 18:05:22 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-20 18:05:22 INFO: [load_embeddings] embeddings shape: (196119, 192) 2025-08-20 18:05:22 INFO: [load_embeddings] labels shape: (196119,) 2025-08-20 18:05:22 INFO: [load_embeddings] example label: DAPI_WT_Untreated 2025-08-20 18:05:22 INFO: [load_embeddings] paths shape: (196119,) 2025-08-20 18:05:22 INFO: [load_embeddings] multiplex=False 2025-08-20 18:05:22 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-20 18:05:22 INFO: [load_embeddings] input_folders = ['batch2'] 2025-08-20 18:05:22 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-20 18:05:27 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-20 18:05:28 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-20 18:05:29 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-20 18:05:30 INFO: [load_embeddings] embeddings shape: (141079, 192) 2025-08-20 18:05:30 INFO: [load_embeddings] labels shape: (141079,) 2025-08-20 18:05:30 INFO: [load_embeddings] example label: HNRNPA1_WT_Untreated 2025-08-20 18:05:30 INFO: [load_embeddings] paths shape: (141079,) 2025-08-20 18:05:30 INFO: [load_embeddings] multiplex=False 2025-08-20 18:05:30 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-20 18:05:30 INFO: [load_embeddings] input_folders = ['batch3'] 2025-08-20 18:05:30 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-20 18:05:36 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-20 18:05:37 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-20 18:05:38 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-20 18:05:38 INFO: [load_embeddings] embeddings shape: (134336, 192) 2025-08-20 18:05:39 INFO: [load_embeddings] labels shape: (134336,) 2025-08-20 18:05:39 INFO: [load_embeddings] example label: TOMM20_WT_Untreated 2025-08-20 18:05:39 INFO: [load_embeddings] paths shape: (134336,) 2025-08-20 18:05:39 INFO: [load_embeddings] multiplex=False 2025-08-20 18:05:39 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-20 18:05:39 INFO: [load_embeddings] input_folders = ['batch7'] 2025-08-20 18:05:39 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-20 18:05:49 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-20 18:05:51 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-20 18:05:52 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-20 18:05:53 INFO: [load_embeddings] embeddings shape: (189079, 192) 2025-08-20 18:05:53 INFO: [load_embeddings] labels shape: (189079,) 2025-08-20 18:05:53 INFO: [load_embeddings] example label: DAPI_WT_Untreated 2025-08-20 18:05:53 INFO: [load_embeddings] paths shape: (189079,) 2025-08-20 18:05:53 INFO: [load_embeddings] multiplex=False 2025-08-20 18:05:53 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-20 18:05:53 INFO: [load_embeddings] input_folders = ['batch8'] 2025-08-20 18:05:53 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-20 18:06:02 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-20 18:06:04 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-20 18:06:05 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-20 18:06:06 INFO: [load_embeddings] embeddings shape: (169304, 192) 2025-08-20 18:06:06 INFO: [load_embeddings] labels shape: (169304,) 2025-08-20 18:06:06 INFO: [load_embeddings] example label: DCP1A_WT_Untreated 2025-08-20 18:06:06 INFO: [load_embeddings] paths shape: (169304,) 2025-08-20 18:06:06 INFO: [load_embeddings] multiplex=False 2025-08-20 18:06:06 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-20 18:06:06 INFO: [load_embeddings] input_folders = ['batch9'] 2025-08-20 18:06:06 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-20 18:06:15 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-20 18:06:17 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-20 18:06:18 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-20 18:06:19 INFO: [load_embeddings] embeddings shape: (196652, 192) 2025-08-20 18:06:19 INFO: [load_embeddings] labels shape: (196652,) 2025-08-20 18:06:19 INFO: [load_embeddings] example label: TIA1_WT_Untreated 2025-08-20 18:06:19 INFO: [load_embeddings] paths shape: (196652,)
Batches loaded.
Training on Batches: [1], Testing on: [2].
=== Batch [2] ===
Train: (196119, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27 28]
Test: (141079, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27 28]
DAPI_WT_Untreated: 63181
PEX14_WT_Untreated: 4830
Calreticulin_WT_Untreated: 6639
TDP43_WT_Untreated: 4683
TIA1_WT_Untreated: 3899
TOMM20_WT_Untreated: 4400
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LSM14A_WT_Untreated: 6008
SON_WT_Untreated: 4987
CLTC_WT_Untreated: 5749
GM130_WT_Untreated: 6376
NEMO_WT_Untreated: 5115
Tubulin_WT_Untreated: 4873
PURA_WT_Untreated: 2650
FMRP_WT_Untreated: 2678
DCP1A_WT_Untreated: 5177
NONO_WT_Untreated: 4542
PML_WT_Untreated: 4895
mitotracker_WT_Untreated: 4364
SQSTM1_WT_Untreated: 4527
ANXA11_WT_Untreated: 4777
Phalloidin_WT_Untreated: 5431
G3BP1_WT_Untreated: 2689
PSD95_WT_Untreated: 5739
FUS_WT_Untreated: 6586
SNCA_WT_Untreated: 2368
NCL_WT_Untreated: 6573
LAMP1_WT_Untreated: 1561
precision recall f1-score support
0 0.96 0.99 0.98 3787
1 0.92 0.94 0.93 4311
2 0.89 0.87 0.88 3390
3 0.99 1.00 1.00 45031
4 0.97 0.93 0.95 4057
5 0.87 0.98 0.92 938
6 0.98 0.82 0.89 3613
7 0.81 0.97 0.88 953
8 0.96 0.97 0.97 3228
9 0.91 0.99 0.95 4216
10 0.94 0.96 0.95 3856
11 0.90 0.86 0.88 1375
12 0.95 0.96 0.95 4171
13 0.99 0.98 0.99 3624
14 0.99 0.99 0.99 3952
15 0.95 0.89 0.92 4542
16 0.98 0.99 0.99 3866
17 0.99 1.00 0.99 3757
18 0.94 0.68 0.79 4292
19 0.84 0.92 0.88 786
20 0.91 0.88 0.89 3857
21 0.96 0.94 0.95 2469
22 0.99 1.00 1.00 4551
23 0.55 0.78 0.64 3546
24 0.88 0.94 0.91 3696
25 0.99 0.98 0.98 3615
26 0.86 0.93 0.89 3701
27 0.90 0.42 0.57 3897
28 0.73 0.94 0.83 4002
accuracy 0.94 141079
macro avg 0.91 0.91 0.91 141079
weighted avg 0.94 0.94 0.93 141079
Training on Batches: [1], Testing on: [3].
=== Batch [3] ===
Train: (196119, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27 28]
Test: (134336, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27 28]
DAPI_WT_Untreated: 63181
PEX14_WT_Untreated: 4830
Calreticulin_WT_Untreated: 6639
TDP43_WT_Untreated: 4683
TIA1_WT_Untreated: 3899
TOMM20_WT_Untreated: 4400
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LSM14A_WT_Untreated: 6008
SON_WT_Untreated: 4987
CLTC_WT_Untreated: 5749
GM130_WT_Untreated: 6376
NEMO_WT_Untreated: 5115
Tubulin_WT_Untreated: 4873
PURA_WT_Untreated: 2650
FMRP_WT_Untreated: 2678
DCP1A_WT_Untreated: 5177
NONO_WT_Untreated: 4542
PML_WT_Untreated: 4895
mitotracker_WT_Untreated: 4364
SQSTM1_WT_Untreated: 4527
ANXA11_WT_Untreated: 4777
Phalloidin_WT_Untreated: 5431
G3BP1_WT_Untreated: 2689
PSD95_WT_Untreated: 5739
FUS_WT_Untreated: 6586
SNCA_WT_Untreated: 2368
NCL_WT_Untreated: 6573
LAMP1_WT_Untreated: 1561
precision recall f1-score support
0 0.95 0.99 0.97 3867
1 0.83 0.91 0.87 3424
2 0.97 0.85 0.91 3987
3 1.00 1.00 1.00 43178
4 0.91 0.88 0.90 3476
5 0.87 0.96 0.91 1754
6 0.99 0.95 0.97 3703
7 0.80 0.91 0.85 1846
8 0.97 0.91 0.94 3826
9 0.98 0.99 0.99 3833
10 0.94 0.92 0.93 2963
11 0.84 0.94 0.89 918
12 0.94 0.96 0.95 3797
13 0.99 0.99 0.99 3696
14 0.94 0.99 0.97 3444
15 0.97 0.86 0.91 3657
16 0.97 0.99 0.98 3947
17 0.99 0.99 0.99 3004
18 0.91 0.87 0.89 3514
19 0.87 0.91 0.89 1557
20 0.74 0.85 0.79 3575
21 0.93 0.94 0.94 2414
22 0.99 1.00 0.99 3654
23 0.55 0.60 0.57 3160
24 0.89 0.97 0.93 3571
25 0.97 0.99 0.98 3548
26 0.69 0.95 0.80 3831
27 0.80 0.46 0.58 3203
28 0.78 0.55 0.64 3989
accuracy 0.93 134336
macro avg 0.90 0.90 0.89 134336
weighted avg 0.93 0.93 0.93 134336
Training on Batches: [1], Testing on: [7].
=== Batch [7] ===
Train: (196119, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27 28]
Test: (189079, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27 28]
DAPI_WT_Untreated: 63181
PEX14_WT_Untreated: 4830
Calreticulin_WT_Untreated: 6639
TDP43_WT_Untreated: 4683
TIA1_WT_Untreated: 3899
TOMM20_WT_Untreated: 4400
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LSM14A_WT_Untreated: 6008
SON_WT_Untreated: 4987
CLTC_WT_Untreated: 5749
GM130_WT_Untreated: 6376
NEMO_WT_Untreated: 5115
Tubulin_WT_Untreated: 4873
PURA_WT_Untreated: 2650
FMRP_WT_Untreated: 2678
DCP1A_WT_Untreated: 5177
NONO_WT_Untreated: 4542
PML_WT_Untreated: 4895
mitotracker_WT_Untreated: 4364
SQSTM1_WT_Untreated: 4527
ANXA11_WT_Untreated: 4777
Phalloidin_WT_Untreated: 5431
G3BP1_WT_Untreated: 2689
PSD95_WT_Untreated: 5739
FUS_WT_Untreated: 6586
SNCA_WT_Untreated: 2368
NCL_WT_Untreated: 6573
LAMP1_WT_Untreated: 1561
precision recall f1-score support
0 0.98 0.83 0.90 4789
1 0.68 0.89 0.77 6170
2 0.57 0.85 0.68 5999
3 0.99 1.00 1.00 64486
4 0.92 0.77 0.84 5786
5 0.92 0.92 0.92 1726
6 0.00 0.00 0.00 37
7 0.76 0.96 0.85 1788
8 0.33 0.21 0.26 6070
9 0.94 0.01 0.03 5062
10 0.85 0.96 0.90 5493
11 0.28 0.81 0.41 2097
12 0.88 0.36 0.51 5627
13 0.47 0.01 0.01 6165
14 0.88 0.98 0.93 4370
15 0.89 0.71 0.79 4624
16 0.94 0.94 0.94 4011
17 0.45 1.00 0.62 4952
18 0.86 0.95 0.90 5064
19 0.75 0.91 0.82 2163
20 0.89 0.95 0.92 5532
21 0.86 0.64 0.74 2126
22 0.99 0.98 0.99 5574
23 0.58 0.68 0.63 5085
24 0.73 0.78 0.75 4525
25 0.82 0.90 0.86 3239
26 0.98 0.82 0.89 5021
27 0.75 0.72 0.73 6590
28 0.78 0.19 0.31 4908
accuracy 0.80 189079
macro avg 0.75 0.71 0.69 189079
weighted avg 0.84 0.80 0.79 189079
Training on Batches: [1], Testing on: [8].
=== Batch [8] ===
Train: (196119, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27 28]
Test: (169304, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27 28]
DAPI_WT_Untreated: 63181
PEX14_WT_Untreated: 4830
Calreticulin_WT_Untreated: 6639
TDP43_WT_Untreated: 4683
TIA1_WT_Untreated: 3899
TOMM20_WT_Untreated: 4400
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LSM14A_WT_Untreated: 6008
SON_WT_Untreated: 4987
CLTC_WT_Untreated: 5749
GM130_WT_Untreated: 6376
NEMO_WT_Untreated: 5115
Tubulin_WT_Untreated: 4873
PURA_WT_Untreated: 2650
FMRP_WT_Untreated: 2678
DCP1A_WT_Untreated: 5177
NONO_WT_Untreated: 4542
PML_WT_Untreated: 4895
mitotracker_WT_Untreated: 4364
SQSTM1_WT_Untreated: 4527
ANXA11_WT_Untreated: 4777
Phalloidin_WT_Untreated: 5431
G3BP1_WT_Untreated: 2689
PSD95_WT_Untreated: 5739
FUS_WT_Untreated: 6586
SNCA_WT_Untreated: 2368
NCL_WT_Untreated: 6573
LAMP1_WT_Untreated: 1561
precision recall f1-score support
0 0.97 0.93 0.95 4070
1 0.90 0.64 0.75 3949
2 0.85 0.97 0.90 5874
3 0.99 1.00 0.99 55763
4 0.95 0.96 0.95 5741
5 0.80 0.96 0.87 1933
6 0.93 0.77 0.84 5849
7 0.78 0.96 0.86 2165
8 0.95 0.61 0.74 5637
9 0.85 0.95 0.90 5508
10 0.80 0.97 0.87 3647
11 0.93 0.72 0.82 2717
12 0.90 0.94 0.92 5484
13 0.90 0.97 0.93 5848
14 0.97 1.00 0.98 5741
15 0.88 0.75 0.81 4699
16 0.80 0.95 0.87 4075
17 0.90 0.99 0.94 3375
18 0.76 0.91 0.83 2263
19 0.75 0.92 0.83 1816
20 0.83 0.93 0.88 4069
21 0.88 0.90 0.89 1510
22 0.97 0.99 0.98 5255
23 0.54 0.67 0.60 3269
24 0.74 0.86 0.80 3766
25 0.96 0.92 0.94 2990
26 0.86 0.84 0.85 3809
27 0.78 0.58 0.67 4782
28 0.91 0.54 0.68 3700
accuracy 0.91 169304
macro avg 0.86 0.86 0.86 169304
weighted avg 0.91 0.91 0.90 169304
Training on Batches: [1], Testing on: [9].
=== Batch [9] ===
Train: (196119, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27 28]
Test: (196652, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27 28]
DAPI_WT_Untreated: 63181
PEX14_WT_Untreated: 4830
Calreticulin_WT_Untreated: 6639
TDP43_WT_Untreated: 4683
TIA1_WT_Untreated: 3899
TOMM20_WT_Untreated: 4400
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LSM14A_WT_Untreated: 6008
SON_WT_Untreated: 4987
CLTC_WT_Untreated: 5749
GM130_WT_Untreated: 6376
NEMO_WT_Untreated: 5115
Tubulin_WT_Untreated: 4873
PURA_WT_Untreated: 2650
FMRP_WT_Untreated: 2678
DCP1A_WT_Untreated: 5177
NONO_WT_Untreated: 4542
PML_WT_Untreated: 4895
mitotracker_WT_Untreated: 4364
SQSTM1_WT_Untreated: 4527
ANXA11_WT_Untreated: 4777
Phalloidin_WT_Untreated: 5431
G3BP1_WT_Untreated: 2689
PSD95_WT_Untreated: 5739
FUS_WT_Untreated: 6586
SNCA_WT_Untreated: 2368
NCL_WT_Untreated: 6573
LAMP1_WT_Untreated: 1561
precision recall f1-score support
0 0.96 0.38 0.54 3071
1 0.93 0.92 0.92 6371
2 0.62 0.98 0.76 6644
3 1.00 1.00 1.00 63040
4 0.91 0.96 0.93 6827
5 0.97 0.98 0.97 2353
6 0.85 0.99 0.92 6434
7 0.85 0.98 0.91 2695
8 0.94 0.99 0.97 6365
9 0.99 0.80 0.88 5907
10 0.90 0.97 0.93 5015
11 0.87 0.76 0.81 2610
12 0.93 0.93 0.93 6045
13 0.99 0.97 0.98 6417
14 0.92 1.00 0.96 6775
15 0.68 0.62 0.65 3324
16 0.98 0.89 0.93 4246
17 0.97 0.99 0.98 4849
18 0.91 0.89 0.90 6356
19 0.86 0.82 0.84 2304
20 0.81 0.96 0.88 5279
21 0.62 0.63 0.63 2518
22 0.99 0.99 0.99 3306
23 0.61 0.52 0.56 4078
24 0.72 0.75 0.73 4158
25 0.77 0.98 0.86 3149
26 0.98 0.81 0.89 5271
27 0.72 0.75 0.74 5968
28 0.79 0.29 0.42 5277
accuracy 0.90 196652
macro avg 0.86 0.84 0.84 196652
weighted avg 0.91 0.90 0.90 196652
=== Overall Accuracy ===
0.8952082124805276 [0.9362555731186073, 0.9276441162458313, 0.8037010984826448, 0.9056017577848131, 0.9028385167707422]
=== Evaluation Metrics ===
Label Accuracy Sensitivity Specificity PPV NPV
ANXA11_WT_Untreated 0.995581 0.841810 0.999295 0.966467 0.996191
CLTC_WT_Untreated 0.990983 0.868029 0.994678 0.830523 0.996029
Calreticulin_WT_Untreated 0.986117 0.911331 0.988524 0.718772 0.997121
DAPI_WT_Untreated 0.997806 0.999989 0.996746 0.993345 0.999995
DCP1A_WT_Untreated 0.994769 0.901456 0.997771 0.928648 0.996832
FMRP_WT_Untreated 0.998248 0.957835 0.998676 0.884562 0.999553
FUS_WT_Untreated 0.989688 0.883938 0.992249 0.734160 0.997175
G3BP1_WT_Untreated 0.996822 0.954165 0.997313 0.803387 0.999471
GM130_WT_Untreated 0.986956 0.703415 0.995803 0.839460 0.990793
HNRNPA1_WT_Untreated 0.990408 0.733670 0.998221 0.926189 0.991946
KIF5A_WT_Untreated 0.995517 0.957137 0.996511 0.876676 0.998887
LAMP1_WT_Untreated 0.991375 0.793352 0.993719 0.599269 0.997544
LSM14A_WT_Untreated 0.992354 0.814560 0.997900 0.923678 0.994236
NCL_WT_Untreated 0.991095 0.741709 0.999075 0.962506 0.991795
NEMO_WT_Untreated 0.997861 0.990693 0.998077 0.939467 0.999719
NONO_WT_Untreated 0.991576 0.768013 0.997332 0.881123 0.994046
PEX14_WT_Untreated 0.997045 0.949417 0.998229 0.930208 0.998742
PML_WT_Untreated 0.991768 0.993479 0.991726 0.747067 0.999838
PSD95_WT_Untreated 0.993511 0.863232 0.996971 0.883333 0.996369
PURA_WT_Untreated 0.996604 0.888824 0.997736 0.804681 0.998832
Phalloidin_WT_Untreated 0.992933 0.919236 0.994967 0.834520 0.997764
SNCA_WT_Untreated 0.995421 0.806469 0.997966 0.842260 0.997395
SON_WT_Untreated 0.999396 0.991047 0.999626 0.986543 0.999752
SQSTM1_WT_Untreated 0.980460 0.647403 0.988316 0.566555 0.991655
TDP43_WT_Untreated 0.990992 0.853266 0.994341 0.785718 0.996424
TIA1_WT_Untreated 0.996888 0.954900 0.997742 0.895764 0.999082
TOMM20_WT_Untreated 0.992909 0.861739 0.996417 0.865460 0.996302
Tubulin_WT_Untreated 0.983191 0.616326 0.994315 0.766760 0.988435
mitotracker_WT_Untreated 0.982767 0.474721 0.996512 0.786445 0.985939
Macro Average 0.992450 0.849695 0.996095 0.844950 0.996133
{'Accuracy': 0.9924496274350632,
'Sensitivity': 0.8496952755838646,
'Specificity': 0.9960950122368418,
'PPV': 0.8449497977273821,
'NPV': 0.9961332704646154}
additional_classifiers = [
(GaussianNB, {}),
(RidgeClassifier, {}),
(LinearSVC, {"C": 1.0, "max_iter": 1000, "random_state": 42}),
(RandomForestClassifier, {"n_estimators": 300, "random_state": 42}),
(ExtraTreesClassifier, {
"max_depth": None,
"min_samples_leaf": 1,
"n_estimators": 300,
"n_jobs": -1,
"random_state": 42
}),
]
for clf_class, clf_kwargs in additional_classifiers:
print(f"\n=== Running {clf_class.__name__} ===")
run_baseline_model(
dataset_config=dataset_config,
batches=[1, 2, 3, 7, 8, 9],
classifier_class=clf_class,
classifier_kwargs=clf_kwargs,
train_specific_batches=[1],
results_csv="classification_results-indi.csv"
)
2025-08-20 18:42:40 INFO: [newNeuronsD8FigureConfig_UMAP1_B1] Init (log path: /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen/logs/200825_184240_000444_71466_galavir_sysdashboardsysjupyter.log; JOBID: 71466 Username: galavir) JOBNAME: sysdashboardsysjupyter 2025-08-20 18:42:40 INFO: [newNeuronsD8FigureConfig_UMAP1_B1] NOVA_HOME=/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA, NOVA_DATA_HOME=/home/projects/hornsteinlab/Collaboration/NOVA/input 2025-08-20 18:42:40 INFO: [load_embeddings] multiplex=False 2025-08-20 18:42:40 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-20 18:42:40 INFO: [load_embeddings] input_folders = ['batch1'] 2025-08-20 18:42:40 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
=== Running RandomForestClassifier === Loading all batches...
2025-08-20 18:42:45 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-20 18:42:47 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-20 18:42:48 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-20 18:42:48 INFO: [load_embeddings] embeddings shape: (196119, 192) 2025-08-20 18:42:48 INFO: [load_embeddings] labels shape: (196119,) 2025-08-20 18:42:48 INFO: [load_embeddings] example label: DAPI_WT_Untreated 2025-08-20 18:42:48 INFO: [load_embeddings] paths shape: (196119,) 2025-08-20 18:42:48 INFO: [load_embeddings] multiplex=False 2025-08-20 18:42:48 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-20 18:42:48 INFO: [load_embeddings] input_folders = ['batch2'] 2025-08-20 18:42:48 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-20 18:42:52 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-20 18:42:53 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-20 18:42:54 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-20 18:42:55 INFO: [load_embeddings] embeddings shape: (141079, 192) 2025-08-20 18:42:55 INFO: [load_embeddings] labels shape: (141079,) 2025-08-20 18:42:55 INFO: [load_embeddings] example label: HNRNPA1_WT_Untreated 2025-08-20 18:42:55 INFO: [load_embeddings] paths shape: (141079,) 2025-08-20 18:42:55 INFO: [load_embeddings] multiplex=False 2025-08-20 18:42:55 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-20 18:42:55 INFO: [load_embeddings] input_folders = ['batch3'] 2025-08-20 18:42:55 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-20 18:43:00 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-20 18:43:01 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-20 18:43:02 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-20 18:43:03 INFO: [load_embeddings] embeddings shape: (134336, 192) 2025-08-20 18:43:03 INFO: [load_embeddings] labels shape: (134336,) 2025-08-20 18:43:03 INFO: [load_embeddings] example label: TOMM20_WT_Untreated 2025-08-20 18:43:03 INFO: [load_embeddings] paths shape: (134336,) 2025-08-20 18:43:03 INFO: [load_embeddings] multiplex=False 2025-08-20 18:43:03 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-20 18:43:03 INFO: [load_embeddings] input_folders = ['batch7'] 2025-08-20 18:43:03 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-20 18:43:08 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-20 18:43:10 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-20 18:43:12 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-20 18:43:12 INFO: [load_embeddings] embeddings shape: (189079, 192) 2025-08-20 18:43:12 INFO: [load_embeddings] labels shape: (189079,) 2025-08-20 18:43:12 INFO: [load_embeddings] example label: DAPI_WT_Untreated 2025-08-20 18:43:12 INFO: [load_embeddings] paths shape: (189079,) 2025-08-20 18:43:12 INFO: [load_embeddings] multiplex=False 2025-08-20 18:43:12 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-20 18:43:12 INFO: [load_embeddings] input_folders = ['batch8'] 2025-08-20 18:43:13 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-20 18:43:19 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-20 18:43:21 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-20 18:43:23 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-20 18:43:24 INFO: [load_embeddings] embeddings shape: (169304, 192) 2025-08-20 18:43:24 INFO: [load_embeddings] labels shape: (169304,) 2025-08-20 18:43:24 INFO: [load_embeddings] example label: DCP1A_WT_Untreated 2025-08-20 18:43:24 INFO: [load_embeddings] paths shape: (169304,) 2025-08-20 18:43:24 INFO: [load_embeddings] multiplex=False 2025-08-20 18:43:24 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-20 18:43:24 INFO: [load_embeddings] input_folders = ['batch9'] 2025-08-20 18:43:24 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-20 18:43:30 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-20 18:43:32 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-20 18:43:34 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-20 18:43:34 INFO: [load_embeddings] embeddings shape: (196652, 192) 2025-08-20 18:43:34 INFO: [load_embeddings] labels shape: (196652,) 2025-08-20 18:43:34 INFO: [load_embeddings] example label: TIA1_WT_Untreated 2025-08-20 18:43:34 INFO: [load_embeddings] paths shape: (196652,)
Batches loaded.
Training on Batches: [1], Testing on: [2].
=== Batch [2] ===
Train: (196119, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27 28]
Test: (141079, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27 28]
DAPI_WT_Untreated: 63181
PEX14_WT_Untreated: 4830
Calreticulin_WT_Untreated: 6639
TDP43_WT_Untreated: 4683
TIA1_WT_Untreated: 3899
TOMM20_WT_Untreated: 4400
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LSM14A_WT_Untreated: 6008
SON_WT_Untreated: 4987
CLTC_WT_Untreated: 5749
GM130_WT_Untreated: 6376
NEMO_WT_Untreated: 5115
Tubulin_WT_Untreated: 4873
PURA_WT_Untreated: 2650
FMRP_WT_Untreated: 2678
DCP1A_WT_Untreated: 5177
NONO_WT_Untreated: 4542
PML_WT_Untreated: 4895
mitotracker_WT_Untreated: 4364
SQSTM1_WT_Untreated: 4527
ANXA11_WT_Untreated: 4777
Phalloidin_WT_Untreated: 5431
G3BP1_WT_Untreated: 2689
PSD95_WT_Untreated: 5739
FUS_WT_Untreated: 6586
SNCA_WT_Untreated: 2368
NCL_WT_Untreated: 6573
LAMP1_WT_Untreated: 1561
precision recall f1-score support
0 0.94 0.99 0.97 3787
1 0.87 0.90 0.88 4311
2 0.88 0.82 0.85 3390
3 1.00 1.00 1.00 45031
4 0.96 0.93 0.94 4057
5 0.85 0.98 0.91 938
6 0.98 0.85 0.91 3613
7 0.80 0.95 0.86 953
8 0.93 0.96 0.95 3228
9 0.90 0.99 0.94 4216
10 0.95 0.94 0.95 3856
11 0.88 0.75 0.81 1375
12 0.93 0.97 0.95 4171
13 1.00 0.97 0.98 3624
14 0.99 1.00 0.99 3952
15 0.93 0.86 0.90 4542
16 0.98 0.99 0.98 3866
17 0.99 1.00 1.00 3757
18 0.91 0.57 0.70 4292
19 0.84 0.91 0.88 786
20 0.91 0.85 0.87 3857
21 0.96 0.93 0.94 2469
22 0.99 1.00 1.00 4551
23 0.54 0.73 0.62 3546
24 0.85 0.92 0.88 3696
25 0.99 0.98 0.98 3615
26 0.81 0.92 0.86 3701
27 0.89 0.43 0.58 3897
28 0.65 0.93 0.76 4002
accuracy 0.93 141079
macro avg 0.90 0.90 0.89 141079
weighted avg 0.93 0.93 0.92 141079
Training on Batches: [1], Testing on: [3].
=== Batch [3] ===
Train: (196119, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27 28]
Test: (134336, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27 28]
DAPI_WT_Untreated: 63181
PEX14_WT_Untreated: 4830
Calreticulin_WT_Untreated: 6639
TDP43_WT_Untreated: 4683
TIA1_WT_Untreated: 3899
TOMM20_WT_Untreated: 4400
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LSM14A_WT_Untreated: 6008
SON_WT_Untreated: 4987
CLTC_WT_Untreated: 5749
GM130_WT_Untreated: 6376
NEMO_WT_Untreated: 5115
Tubulin_WT_Untreated: 4873
PURA_WT_Untreated: 2650
FMRP_WT_Untreated: 2678
DCP1A_WT_Untreated: 5177
NONO_WT_Untreated: 4542
PML_WT_Untreated: 4895
mitotracker_WT_Untreated: 4364
SQSTM1_WT_Untreated: 4527
ANXA11_WT_Untreated: 4777
Phalloidin_WT_Untreated: 5431
G3BP1_WT_Untreated: 2689
PSD95_WT_Untreated: 5739
FUS_WT_Untreated: 6586
SNCA_WT_Untreated: 2368
NCL_WT_Untreated: 6573
LAMP1_WT_Untreated: 1561
precision recall f1-score support
0 0.93 0.99 0.96 3867
1 0.72 0.83 0.77 3424
2 0.97 0.80 0.88 3987
3 1.00 1.00 1.00 43178
4 0.84 0.87 0.85 3476
5 0.82 0.96 0.88 1754
6 0.99 0.97 0.98 3703
7 0.72 0.86 0.78 1846
8 0.97 0.83 0.89 3826
9 0.98 0.99 0.99 3833
10 0.94 0.91 0.93 2963
11 0.84 0.90 0.87 918
12 0.93 0.96 0.94 3797
13 1.00 0.99 0.99 3696
14 0.94 0.99 0.96 3444
15 0.97 0.85 0.91 3657
16 0.95 0.99 0.97 3947
17 0.99 1.00 0.99 3004
18 0.89 0.85 0.87 3514
19 0.83 0.91 0.87 1557
20 0.72 0.80 0.76 3575
21 0.93 0.94 0.94 2414
22 0.99 1.00 0.99 3654
23 0.51 0.55 0.53 3160
24 0.89 0.97 0.92 3571
25 0.98 0.99 0.98 3548
26 0.71 0.94 0.81 3831
27 0.79 0.40 0.53 3203
28 0.68 0.49 0.57 3989
accuracy 0.92 134336
macro avg 0.88 0.88 0.87 134336
weighted avg 0.92 0.92 0.91 134336
Training on Batches: [1], Testing on: [7].
=== Batch [7] ===
Train: (196119, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27 28]
Test: (189079, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27 28]
DAPI_WT_Untreated: 63181
PEX14_WT_Untreated: 4830
Calreticulin_WT_Untreated: 6639
TDP43_WT_Untreated: 4683
TIA1_WT_Untreated: 3899
TOMM20_WT_Untreated: 4400
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LSM14A_WT_Untreated: 6008
SON_WT_Untreated: 4987
CLTC_WT_Untreated: 5749
GM130_WT_Untreated: 6376
NEMO_WT_Untreated: 5115
Tubulin_WT_Untreated: 4873
PURA_WT_Untreated: 2650
FMRP_WT_Untreated: 2678
DCP1A_WT_Untreated: 5177
NONO_WT_Untreated: 4542
PML_WT_Untreated: 4895
mitotracker_WT_Untreated: 4364
SQSTM1_WT_Untreated: 4527
ANXA11_WT_Untreated: 4777
Phalloidin_WT_Untreated: 5431
G3BP1_WT_Untreated: 2689
PSD95_WT_Untreated: 5739
FUS_WT_Untreated: 6586
SNCA_WT_Untreated: 2368
NCL_WT_Untreated: 6573
LAMP1_WT_Untreated: 1561
precision recall f1-score support
0 0.94 0.81 0.87 4789
1 0.82 0.84 0.83 6170
2 0.59 0.89 0.71 5999
3 0.99 1.00 1.00 64486
4 0.76 0.76 0.76 5786
5 0.94 0.91 0.93 1726
6 0.00 0.00 0.00 37
7 0.77 0.95 0.85 1788
8 0.41 0.40 0.40 6070
9 0.94 0.01 0.01 5062
10 0.83 0.96 0.89 5493
11 0.20 0.53 0.30 2097
12 0.62 0.12 0.21 5627
13 0.40 0.00 0.00 6165
14 0.80 0.99 0.89 4370
15 0.88 0.75 0.81 4624
16 0.94 0.93 0.94 4011
17 0.45 1.00 0.62 4952
18 0.81 0.93 0.87 5064
19 0.69 0.92 0.79 2163
20 0.85 0.93 0.89 5532
21 0.78 0.68 0.72 2126
22 1.00 0.99 1.00 5574
23 0.59 0.62 0.61 5085
24 0.74 0.69 0.71 4525
25 0.83 0.93 0.88 3239
26 0.98 0.73 0.84 5021
27 0.72 0.70 0.71 6590
28 0.68 0.18 0.28 4908
accuracy 0.79 189079
macro avg 0.72 0.70 0.67 189079
weighted avg 0.82 0.79 0.78 189079
Training on Batches: [1], Testing on: [8].
=== Batch [8] ===
Train: (196119, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27 28]
Test: (169304, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27 28]
DAPI_WT_Untreated: 63181
PEX14_WT_Untreated: 4830
Calreticulin_WT_Untreated: 6639
TDP43_WT_Untreated: 4683
TIA1_WT_Untreated: 3899
TOMM20_WT_Untreated: 4400
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LSM14A_WT_Untreated: 6008
SON_WT_Untreated: 4987
CLTC_WT_Untreated: 5749
GM130_WT_Untreated: 6376
NEMO_WT_Untreated: 5115
Tubulin_WT_Untreated: 4873
PURA_WT_Untreated: 2650
FMRP_WT_Untreated: 2678
DCP1A_WT_Untreated: 5177
NONO_WT_Untreated: 4542
PML_WT_Untreated: 4895
mitotracker_WT_Untreated: 4364
SQSTM1_WT_Untreated: 4527
ANXA11_WT_Untreated: 4777
Phalloidin_WT_Untreated: 5431
G3BP1_WT_Untreated: 2689
PSD95_WT_Untreated: 5739
FUS_WT_Untreated: 6586
SNCA_WT_Untreated: 2368
NCL_WT_Untreated: 6573
LAMP1_WT_Untreated: 1561
precision recall f1-score support
0 0.97 0.94 0.95 4070
1 0.83 0.52 0.64 3949
2 0.85 0.96 0.90 5874
3 0.99 1.00 1.00 55763
4 0.91 0.95 0.93 5741
5 0.79 0.95 0.86 1933
6 0.94 0.79 0.86 5849
7 0.75 0.94 0.84 2165
8 0.85 0.53 0.66 5637
9 0.82 0.96 0.88 5508
10 0.80 0.97 0.88 3647
11 0.90 0.45 0.60 2717
12 0.81 0.95 0.88 5484
13 0.99 0.94 0.96 5848
14 0.96 0.99 0.98 5741
15 0.87 0.71 0.78 4699
16 0.71 0.96 0.82 4075
17 0.88 1.00 0.93 3375
18 0.68 0.89 0.77 2263
19 0.74 0.93 0.82 1816
20 0.82 0.90 0.86 4069
21 0.83 0.91 0.87 1510
22 0.98 1.00 0.99 5255
23 0.52 0.63 0.57 3269
24 0.71 0.85 0.77 3766
25 0.96 0.90 0.93 2990
26 0.86 0.77 0.81 3809
27 0.77 0.55 0.64 4782
28 0.81 0.54 0.65 3700
accuracy 0.89 169304
macro avg 0.84 0.84 0.83 169304
weighted avg 0.90 0.89 0.89 169304
Training on Batches: [1], Testing on: [9].
=== Batch [9] ===
Train: (196119, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27 28]
Test: (196652, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27 28]
DAPI_WT_Untreated: 63181
PEX14_WT_Untreated: 4830
Calreticulin_WT_Untreated: 6639
TDP43_WT_Untreated: 4683
TIA1_WT_Untreated: 3899
TOMM20_WT_Untreated: 4400
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LSM14A_WT_Untreated: 6008
SON_WT_Untreated: 4987
CLTC_WT_Untreated: 5749
GM130_WT_Untreated: 6376
NEMO_WT_Untreated: 5115
Tubulin_WT_Untreated: 4873
PURA_WT_Untreated: 2650
FMRP_WT_Untreated: 2678
DCP1A_WT_Untreated: 5177
NONO_WT_Untreated: 4542
PML_WT_Untreated: 4895
mitotracker_WT_Untreated: 4364
SQSTM1_WT_Untreated: 4527
ANXA11_WT_Untreated: 4777
Phalloidin_WT_Untreated: 5431
G3BP1_WT_Untreated: 2689
PSD95_WT_Untreated: 5739
FUS_WT_Untreated: 6586
SNCA_WT_Untreated: 2368
NCL_WT_Untreated: 6573
LAMP1_WT_Untreated: 1561
precision recall f1-score support
0 0.87 0.39 0.54 3071
1 0.91 0.88 0.90 6371
2 0.63 0.98 0.77 6644
3 1.00 1.00 1.00 63040
4 0.88 0.96 0.91 6827
5 0.95 0.98 0.96 2353
6 0.83 0.99 0.91 6434
7 0.86 0.95 0.90 2695
8 0.87 0.99 0.93 6365
9 0.98 0.79 0.88 5907
10 0.87 0.97 0.91 5015
11 0.79 0.45 0.57 2610
12 0.90 0.91 0.90 6045
13 1.00 0.96 0.98 6417
14 0.91 0.99 0.95 6775
15 0.58 0.62 0.60 3324
16 0.99 0.89 0.93 4246
17 0.98 0.99 0.99 4849
18 0.88 0.90 0.89 6356
19 0.83 0.83 0.83 2304
20 0.79 0.94 0.86 5279
21 0.63 0.65 0.64 2518
22 0.99 1.00 0.99 3306
23 0.59 0.50 0.54 4078
24 0.65 0.56 0.60 4158
25 0.77 0.99 0.86 3149
26 0.98 0.72 0.83 5271
27 0.71 0.74 0.72 5968
28 0.76 0.27 0.40 5277
accuracy 0.89 196652
macro avg 0.84 0.82 0.82 196652
weighted avg 0.89 0.89 0.88 196652
=== Overall Accuracy ===
0.8826791073517963 [0.925481467830081, 0.915004168651739, 0.7934091041310775, 0.8904633086046402, 0.8890374875414437]
2025-08-20 21:19:39 INFO: [load_embeddings] multiplex=False 2025-08-20 21:19:39 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-20 21:19:39 INFO: [load_embeddings] input_folders = ['batch1'] 2025-08-20 21:19:39 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
=== Evaluation Metrics ===
Label Accuracy Sensitivity Specificity PPV NPV
ANXA11_WT_Untreated 0.995018 0.841554 0.998725 0.940965 0.996183
CLTC_WT_Untreated 0.989851 0.809618 0.995267 0.837125 0.994285
Calreticulin_WT_Untreated 0.986229 0.905654 0.988822 0.722815 0.996939
DAPI_WT_Untreated 0.998679 0.999996 0.998039 0.995979 0.999998
DCP1A_WT_Untreated 0.992355 0.896125 0.995451 0.863728 0.996654
FMRP_WT_Untreated 0.998017 0.953010 0.998493 0.870135 0.999502
FUS_WT_Untreated 0.989910 0.902271 0.992033 0.732804 0.997620
G3BP1_WT_Untreated 0.996247 0.931195 0.996995 0.780984 0.999207
GM130_WT_Untreated 0.985003 0.717942 0.993336 0.770700 0.991219
HNRNPA1_WT_Untreated 0.989955 0.732447 0.997791 0.909846 0.991906
KIF5A_WT_Untreated 0.995130 0.953323 0.996214 0.867086 0.998787
LAMP1_WT_Untreated 0.988650 0.549861 0.993845 0.513997 0.994666
LSM14A_WT_Untreated 0.989303 0.760707 0.996435 0.869399 0.992564
NCL_WT_Untreated 0.991507 0.729748 0.999883 0.995023 0.991425
NEMO_WT_Untreated 0.997105 0.993246 0.997221 0.915016 0.999796
NONO_WT_Untreated 0.990597 0.763024 0.996456 0.847190 0.993914
PEX14_WT_Untreated 0.996189 0.950558 0.997323 0.898255 0.998769
PML_WT_Untreated 0.991894 0.996439 0.991782 0.748897 0.999912
PSD95_WT_Untreated 0.991537 0.832984 0.995749 0.838840 0.995564
PURA_WT_Untreated 0.996124 0.893809 0.997198 0.769999 0.998884
Phalloidin_WT_Untreated 0.991654 0.893510 0.994364 0.814013 0.997052
SNCA_WT_Untreated 0.995250 0.815348 0.997673 0.825142 0.997513
SON_WT_Untreated 0.999652 0.995971 0.999754 0.991135 0.999889
SQSTM1_WT_Untreated 0.979542 0.606960 0.988331 0.550965 0.990706
TDP43_WT_Untreated 0.989274 0.786620 0.994203 0.767430 0.994808
TIA1_WT_Untreated 0.997009 0.957741 0.997807 0.898735 0.999140
TOMM20_WT_Untreated 0.991481 0.806222 0.996436 0.858148 0.994825
Tubulin_WT_Untreated 0.982305 0.597668 0.993968 0.750270 0.987875
mitotracker_WT_Untreated 0.980427 0.454791 0.994649 0.696904 0.985387
Macro Average 0.991583 0.828564 0.995663 0.822122 0.995689
=== Running ExtraTreesClassifier ===
Loading all batches...
2025-08-20 21:19:44 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-20 21:19:46 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-20 21:19:47 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-20 21:19:48 INFO: [load_embeddings] embeddings shape: (196119, 192) 2025-08-20 21:19:48 INFO: [load_embeddings] labels shape: (196119,) 2025-08-20 21:19:48 INFO: [load_embeddings] example label: DAPI_WT_Untreated 2025-08-20 21:19:48 INFO: [load_embeddings] paths shape: (196119,) 2025-08-20 21:19:48 INFO: [load_embeddings] multiplex=False 2025-08-20 21:19:48 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-20 21:19:48 INFO: [load_embeddings] input_folders = ['batch2'] 2025-08-20 21:19:48 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-20 21:19:51 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-20 21:19:53 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-20 21:19:53 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-20 21:19:54 INFO: [load_embeddings] embeddings shape: (141079, 192) 2025-08-20 21:19:54 INFO: [load_embeddings] labels shape: (141079,) 2025-08-20 21:19:54 INFO: [load_embeddings] example label: HNRNPA1_WT_Untreated 2025-08-20 21:19:54 INFO: [load_embeddings] paths shape: (141079,) 2025-08-20 21:19:54 INFO: [load_embeddings] multiplex=False 2025-08-20 21:19:54 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-20 21:19:54 INFO: [load_embeddings] input_folders = ['batch3'] 2025-08-20 21:19:54 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-20 21:19:57 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-20 21:19:59 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-20 21:20:00 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-20 21:20:00 INFO: [load_embeddings] embeddings shape: (134336, 192) 2025-08-20 21:20:00 INFO: [load_embeddings] labels shape: (134336,) 2025-08-20 21:20:00 INFO: [load_embeddings] example label: TOMM20_WT_Untreated 2025-08-20 21:20:00 INFO: [load_embeddings] paths shape: (134336,) 2025-08-20 21:20:00 INFO: [load_embeddings] multiplex=False 2025-08-20 21:20:00 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-20 21:20:00 INFO: [load_embeddings] input_folders = ['batch7'] 2025-08-20 21:20:00 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-20 21:20:04 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-20 21:20:06 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-20 21:20:08 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-20 21:20:08 INFO: [load_embeddings] embeddings shape: (189079, 192) 2025-08-20 21:20:08 INFO: [load_embeddings] labels shape: (189079,) 2025-08-20 21:20:08 INFO: [load_embeddings] example label: DAPI_WT_Untreated 2025-08-20 21:20:08 INFO: [load_embeddings] paths shape: (189079,) 2025-08-20 21:20:08 INFO: [load_embeddings] multiplex=False 2025-08-20 21:20:08 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-20 21:20:08 INFO: [load_embeddings] input_folders = ['batch8'] 2025-08-20 21:20:08 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-20 21:20:13 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-20 21:20:15 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-20 21:20:17 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-20 21:20:17 INFO: [load_embeddings] embeddings shape: (169304, 192) 2025-08-20 21:20:17 INFO: [load_embeddings] labels shape: (169304,) 2025-08-20 21:20:17 INFO: [load_embeddings] example label: DCP1A_WT_Untreated 2025-08-20 21:20:17 INFO: [load_embeddings] paths shape: (169304,) 2025-08-20 21:20:17 INFO: [load_embeddings] multiplex=False 2025-08-20 21:20:17 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-20 21:20:17 INFO: [load_embeddings] input_folders = ['batch9'] 2025-08-20 21:20:17 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-20 21:20:22 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-20 21:20:24 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-20 21:20:25 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-20 21:20:26 INFO: [load_embeddings] embeddings shape: (196652, 192) 2025-08-20 21:20:26 INFO: [load_embeddings] labels shape: (196652,) 2025-08-20 21:20:26 INFO: [load_embeddings] example label: TIA1_WT_Untreated 2025-08-20 21:20:26 INFO: [load_embeddings] paths shape: (196652,)
Batches loaded.
Training on Batches: [1], Testing on: [2].
=== Batch [2] ===
Train: (196119, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27 28]
Test: (141079, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27 28]
DAPI_WT_Untreated: 63181
PEX14_WT_Untreated: 4830
Calreticulin_WT_Untreated: 6639
TDP43_WT_Untreated: 4683
TIA1_WT_Untreated: 3899
TOMM20_WT_Untreated: 4400
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LSM14A_WT_Untreated: 6008
SON_WT_Untreated: 4987
CLTC_WT_Untreated: 5749
GM130_WT_Untreated: 6376
NEMO_WT_Untreated: 5115
Tubulin_WT_Untreated: 4873
PURA_WT_Untreated: 2650
FMRP_WT_Untreated: 2678
DCP1A_WT_Untreated: 5177
NONO_WT_Untreated: 4542
PML_WT_Untreated: 4895
mitotracker_WT_Untreated: 4364
SQSTM1_WT_Untreated: 4527
ANXA11_WT_Untreated: 4777
Phalloidin_WT_Untreated: 5431
G3BP1_WT_Untreated: 2689
PSD95_WT_Untreated: 5739
FUS_WT_Untreated: 6586
SNCA_WT_Untreated: 2368
NCL_WT_Untreated: 6573
LAMP1_WT_Untreated: 1561
precision recall f1-score support
0 0.94 0.99 0.97 3787
1 0.88 0.89 0.89 4311
2 0.86 0.83 0.85 3390
3 1.00 1.00 1.00 45031
4 0.96 0.92 0.94 4057
5 0.84 0.97 0.90 938
6 0.98 0.85 0.91 3613
7 0.79 0.95 0.86 953
8 0.91 0.97 0.94 3228
9 0.91 0.99 0.95 4216
10 0.96 0.95 0.95 3856
11 0.88 0.71 0.78 1375
12 0.93 0.96 0.95 4171
13 1.00 0.97 0.98 3624
14 0.99 0.99 0.99 3952
15 0.94 0.86 0.90 4542
16 0.98 0.99 0.99 3866
17 0.99 1.00 0.99 3757
18 0.90 0.57 0.69 4292
19 0.86 0.90 0.88 786
20 0.91 0.84 0.87 3857
21 0.96 0.93 0.95 2469
22 0.99 1.00 1.00 4551
23 0.54 0.73 0.62 3546
24 0.85 0.93 0.89 3696
25 0.99 0.98 0.98 3615
26 0.81 0.92 0.86 3701
27 0.89 0.43 0.58 3897
28 0.64 0.93 0.76 4002
accuracy 0.93 141079
macro avg 0.90 0.89 0.89 141079
weighted avg 0.93 0.93 0.92 141079
Training on Batches: [1], Testing on: [3].
=== Batch [3] ===
Train: (196119, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27 28]
Test: (134336, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27 28]
DAPI_WT_Untreated: 63181
PEX14_WT_Untreated: 4830
Calreticulin_WT_Untreated: 6639
TDP43_WT_Untreated: 4683
TIA1_WT_Untreated: 3899
TOMM20_WT_Untreated: 4400
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LSM14A_WT_Untreated: 6008
SON_WT_Untreated: 4987
CLTC_WT_Untreated: 5749
GM130_WT_Untreated: 6376
NEMO_WT_Untreated: 5115
Tubulin_WT_Untreated: 4873
PURA_WT_Untreated: 2650
FMRP_WT_Untreated: 2678
DCP1A_WT_Untreated: 5177
NONO_WT_Untreated: 4542
PML_WT_Untreated: 4895
mitotracker_WT_Untreated: 4364
SQSTM1_WT_Untreated: 4527
ANXA11_WT_Untreated: 4777
Phalloidin_WT_Untreated: 5431
G3BP1_WT_Untreated: 2689
PSD95_WT_Untreated: 5739
FUS_WT_Untreated: 6586
SNCA_WT_Untreated: 2368
NCL_WT_Untreated: 6573
LAMP1_WT_Untreated: 1561
precision recall f1-score support
0 0.94 0.99 0.96 3867
1 0.75 0.83 0.79 3424
2 0.96 0.81 0.88 3987
3 1.00 1.00 1.00 43178
4 0.87 0.87 0.87 3476
5 0.83 0.95 0.88 1754
6 0.99 0.97 0.98 3703
7 0.71 0.87 0.79 1846
8 0.97 0.87 0.91 3826
9 0.98 0.99 0.99 3833
10 0.95 0.92 0.93 2963
11 0.86 0.89 0.87 918
12 0.93 0.96 0.95 3797
13 1.00 0.98 0.99 3696
14 0.94 0.99 0.96 3444
15 0.97 0.85 0.91 3657
16 0.94 0.99 0.97 3947
17 0.99 1.00 0.99 3004
18 0.88 0.84 0.86 3514
19 0.85 0.90 0.87 1557
20 0.73 0.81 0.76 3575
21 0.93 0.94 0.94 2414
22 0.99 1.00 0.99 3654
23 0.51 0.55 0.53 3160
24 0.88 0.97 0.92 3571
25 0.98 0.99 0.98 3548
26 0.70 0.94 0.80 3831
27 0.79 0.40 0.53 3203
28 0.67 0.51 0.58 3989
accuracy 0.92 134336
macro avg 0.88 0.88 0.88 134336
weighted avg 0.92 0.92 0.91 134336
Training on Batches: [1], Testing on: [7].
=== Batch [7] ===
Train: (196119, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27 28]
Test: (189079, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27 28]
DAPI_WT_Untreated: 63181
PEX14_WT_Untreated: 4830
Calreticulin_WT_Untreated: 6639
TDP43_WT_Untreated: 4683
TIA1_WT_Untreated: 3899
TOMM20_WT_Untreated: 4400
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LSM14A_WT_Untreated: 6008
SON_WT_Untreated: 4987
CLTC_WT_Untreated: 5749
GM130_WT_Untreated: 6376
NEMO_WT_Untreated: 5115
Tubulin_WT_Untreated: 4873
PURA_WT_Untreated: 2650
FMRP_WT_Untreated: 2678
DCP1A_WT_Untreated: 5177
NONO_WT_Untreated: 4542
PML_WT_Untreated: 4895
mitotracker_WT_Untreated: 4364
SQSTM1_WT_Untreated: 4527
ANXA11_WT_Untreated: 4777
Phalloidin_WT_Untreated: 5431
G3BP1_WT_Untreated: 2689
PSD95_WT_Untreated: 5739
FUS_WT_Untreated: 6586
SNCA_WT_Untreated: 2368
NCL_WT_Untreated: 6573
LAMP1_WT_Untreated: 1561
precision recall f1-score support
0 0.95 0.81 0.88 4789
1 0.80 0.83 0.81 6170
2 0.58 0.89 0.70 5999
3 0.99 1.00 1.00 64486
4 0.75 0.76 0.76 5786
5 0.94 0.91 0.92 1726
6 0.00 0.00 0.00 37
7 0.77 0.94 0.84 1788
8 0.38 0.38 0.38 6070
9 0.92 0.00 0.01 5062
10 0.84 0.96 0.90 5493
11 0.18 0.46 0.26 2097
12 0.52 0.09 0.15 5627
13 0.00 0.00 0.00 6165
14 0.80 1.00 0.89 4370
15 0.87 0.74 0.80 4624
16 0.94 0.94 0.94 4011
17 0.45 1.00 0.62 4952
18 0.81 0.92 0.86 5064
19 0.70 0.92 0.80 2163
20 0.83 0.94 0.88 5532
21 0.80 0.65 0.72 2126
22 1.00 0.99 1.00 5574
23 0.59 0.60 0.60 5085
24 0.73 0.69 0.71 4525
25 0.82 0.93 0.88 3239
26 0.98 0.73 0.84 5021
27 0.72 0.72 0.72 6590
28 0.68 0.17 0.28 4908
accuracy 0.79 189079
macro avg 0.70 0.69 0.66 189079
weighted avg 0.80 0.79 0.77 189079
Training on Batches: [1], Testing on: [8].
=== Batch [8] ===
Train: (196119, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27 28]
Test: (169304, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27 28]
DAPI_WT_Untreated: 63181
PEX14_WT_Untreated: 4830
Calreticulin_WT_Untreated: 6639
TDP43_WT_Untreated: 4683
TIA1_WT_Untreated: 3899
TOMM20_WT_Untreated: 4400
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LSM14A_WT_Untreated: 6008
SON_WT_Untreated: 4987
CLTC_WT_Untreated: 5749
GM130_WT_Untreated: 6376
NEMO_WT_Untreated: 5115
Tubulin_WT_Untreated: 4873
PURA_WT_Untreated: 2650
FMRP_WT_Untreated: 2678
DCP1A_WT_Untreated: 5177
NONO_WT_Untreated: 4542
PML_WT_Untreated: 4895
mitotracker_WT_Untreated: 4364
SQSTM1_WT_Untreated: 4527
ANXA11_WT_Untreated: 4777
Phalloidin_WT_Untreated: 5431
G3BP1_WT_Untreated: 2689
PSD95_WT_Untreated: 5739
FUS_WT_Untreated: 6586
SNCA_WT_Untreated: 2368
NCL_WT_Untreated: 6573
LAMP1_WT_Untreated: 1561
precision recall f1-score support
0 0.97 0.94 0.95 4070
1 0.84 0.50 0.63 3949
2 0.84 0.97 0.90 5874
3 0.99 1.00 1.00 55763
4 0.92 0.95 0.94 5741
5 0.81 0.95 0.87 1933
6 0.93 0.79 0.85 5849
7 0.76 0.94 0.84 2165
8 0.81 0.55 0.65 5637
9 0.83 0.95 0.89 5508
10 0.83 0.97 0.89 3647
11 0.90 0.40 0.55 2717
12 0.83 0.95 0.89 5484
13 1.00 0.93 0.96 5848
14 0.96 1.00 0.98 5741
15 0.86 0.71 0.78 4699
16 0.69 0.96 0.81 4075
17 0.87 1.00 0.93 3375
18 0.67 0.89 0.76 2263
19 0.78 0.93 0.85 1816
20 0.81 0.90 0.85 4069
21 0.86 0.90 0.88 1510
22 0.99 1.00 0.99 5255
23 0.53 0.61 0.57 3269
24 0.71 0.84 0.77 3766
25 0.96 0.92 0.94 2990
26 0.85 0.76 0.80 3809
27 0.77 0.58 0.66 4782
28 0.80 0.55 0.66 3700
accuracy 0.89 169304
macro avg 0.84 0.84 0.83 169304
weighted avg 0.89 0.89 0.89 169304
Training on Batches: [1], Testing on: [9].
=== Batch [9] ===
Train: (196119, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27 28]
Test: (196652, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27 28]
DAPI_WT_Untreated: 63181
PEX14_WT_Untreated: 4830
Calreticulin_WT_Untreated: 6639
TDP43_WT_Untreated: 4683
TIA1_WT_Untreated: 3899
TOMM20_WT_Untreated: 4400
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LSM14A_WT_Untreated: 6008
SON_WT_Untreated: 4987
CLTC_WT_Untreated: 5749
GM130_WT_Untreated: 6376
NEMO_WT_Untreated: 5115
Tubulin_WT_Untreated: 4873
PURA_WT_Untreated: 2650
FMRP_WT_Untreated: 2678
DCP1A_WT_Untreated: 5177
NONO_WT_Untreated: 4542
PML_WT_Untreated: 4895
mitotracker_WT_Untreated: 4364
SQSTM1_WT_Untreated: 4527
ANXA11_WT_Untreated: 4777
Phalloidin_WT_Untreated: 5431
G3BP1_WT_Untreated: 2689
PSD95_WT_Untreated: 5739
FUS_WT_Untreated: 6586
SNCA_WT_Untreated: 2368
NCL_WT_Untreated: 6573
LAMP1_WT_Untreated: 1561
precision recall f1-score support
0 0.88 0.38 0.53 3071
1 0.92 0.88 0.90 6371
2 0.63 0.98 0.76 6644
3 1.00 1.00 1.00 63040
4 0.88 0.95 0.91 6827
5 0.95 0.97 0.96 2353
6 0.82 0.99 0.90 6434
7 0.86 0.95 0.91 2695
8 0.85 0.99 0.92 6365
9 0.99 0.77 0.87 5907
10 0.87 0.97 0.92 5015
11 0.78 0.39 0.52 2610
12 0.90 0.91 0.90 6045
13 1.00 0.95 0.97 6417
14 0.90 0.99 0.95 6775
15 0.58 0.63 0.60 3324
16 0.99 0.89 0.94 4246
17 0.98 0.99 0.99 4849
18 0.88 0.90 0.89 6356
19 0.85 0.83 0.84 2304
20 0.77 0.95 0.85 5279
21 0.64 0.62 0.63 2518
22 0.99 1.00 0.99 3306
23 0.60 0.48 0.53 4078
24 0.65 0.55 0.60 4158
25 0.75 0.99 0.85 3149
26 0.98 0.72 0.83 5271
27 0.71 0.75 0.73 5968
28 0.76 0.27 0.39 5277
accuracy 0.89 196652
macro avg 0.84 0.82 0.81 196652
weighted avg 0.89 0.89 0.88 196652
=== Overall Accuracy ===
0.8817536486928201 [0.9252546445608489, 0.9163292043830396, 0.7899766764156781, 0.8903924301847564, 0.886815287919777]
=== Evaluation Metrics ===
Label Accuracy Sensitivity Specificity PPV NPV
ANXA11_WT_Untreated 0.995087 0.840584 0.998819 0.945006 0.996160
CLTC_WT_Untreated 0.989785 0.802353 0.995417 0.840265 0.994069
Calreticulin_WT_Untreated 0.985946 0.909786 0.988397 0.716201 0.997071
DAPI_WT_Untreated 0.998273 1.000000 0.997434 0.994746 1.000000
DCP1A_WT_Untreated 0.992550 0.893151 0.995748 0.871110 0.996559
FMRP_WT_Untreated 0.998071 0.950827 0.998571 0.875767 0.999479
FUS_WT_Untreated 0.989808 0.898706 0.992014 0.731573 0.997533
G3BP1_WT_Untreated 0.996291 0.931830 0.997033 0.783255 0.999214
GM130_WT_Untreated 0.984260 0.723991 0.992381 0.747770 0.991397
HNRNPA1_WT_Untreated 0.989966 0.726902 0.997971 0.915994 0.991741
KIF5A_WT_Untreated 0.995565 0.955469 0.996604 0.879372 0.998844
LAMP1_WT_Untreated 0.988096 0.501081 0.993862 0.491471 0.994092
LSM14A_WT_Untreated 0.989217 0.752348 0.996606 0.873677 0.992307
NCL_WT_Untreated 0.991448 0.726252 0.999934 0.997174 0.991316
NEMO_WT_Untreated 0.996985 0.994440 0.997061 0.910658 0.999832
NONO_WT_Untreated 0.990468 0.761777 0.996356 0.843335 0.993881
PEX14_WT_Untreated 0.996047 0.952991 0.997117 0.891520 0.998829
PML_WT_Untreated 0.991754 0.996439 0.991639 0.745637 0.999912
PSD95_WT_Untreated 0.991300 0.828610 0.995622 0.834083 0.995448
PURA_WT_Untreated 0.996436 0.892186 0.997530 0.791281 0.998867
Phalloidin_WT_Untreated 0.991320 0.897185 0.993919 0.802904 0.997152
SNCA_WT_Untreated 0.995324 0.802845 0.997917 0.838475 0.997346
SON_WT_Untreated 0.999706 0.995927 0.999811 0.993170 0.999887
SQSTM1_WT_Untreated 0.979687 0.593845 0.988789 0.555447 0.990404
TDP43_WT_Untreated 0.989125 0.782207 0.994157 0.765018 0.994701
TIA1_WT_Untreated 0.996929 0.962094 0.997637 0.892190 0.999228
TOMM20_WT_Untreated 0.991265 0.802616 0.996311 0.853344 0.994729
Tubulin_WT_Untreated 0.982471 0.612684 0.993684 0.746275 0.988319
mitotracker_WT_Untreated 0.980421 0.459270 0.994521 0.693997 0.985503
Macro Average 0.991503 0.825807 0.995616 0.821404 0.995649
run_train_test_split_baseline(
dataset_config,
batches=[1,],
classifier_class=cuMLLogisticRegression,
classifier_kwargs={},
)
2025-08-19 14:18:26 INFO: [load_embeddings] multiplex=False 2025-08-19 14:18:26 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 14:18:26 INFO: [load_embeddings] input_folders = ['batch1'] 2025-08-19 14:18:26 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-19 14:18:31 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 14:18:33 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 14:18:35 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 14:18:35 INFO: [load_embeddings] embeddings shape: (196119, 192) 2025-08-19 14:18:35 INFO: [load_embeddings] labels shape: (196119,) 2025-08-19 14:18:35 INFO: [load_embeddings] example label: DAPI_WT_Untreated 2025-08-19 14:18:35 INFO: [load_embeddings] paths shape: (196119,)
Train dataset
(156895,) (156895, 192) [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27 28]
3: 50545
23: 3622
8: 5101
22: 3990
27: 3898
15: 3634
16: 3864
25: 3119
26: 3520
21: 1895
12: 4806
24: 3746
20: 4345
2: 5311
11: 1249
4: 4142
17: 3916
14: 4092
1: 4599
9: 4818
19: 2120
10: 3839
18: 4591
13: 5258
7: 2151
28: 3491
0: 3822
6: 5269
5: 2142
Test dataset
(39224,) (39224, 192) [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27 28]
17: 979
27: 975
26: 880
3: 12636
1: 1150
6: 1317
14: 1023
24: 937
0: 955
22: 997
4: 1035
18: 1148
28: 873
13: 1315
21: 473
9: 1205
10: 960
16: 966
23: 905
7: 538
15: 908
5: 536
12: 1202
8: 1275
25: 780
2: 1328
19: 530
20: 1086
11: 312
precision recall f1-score support
0 0.99 0.99 0.99 955
1 0.96 0.94 0.95 1150
2 0.97 0.98 0.98 1328
3 1.00 1.00 1.00 12636
4 0.98 0.94 0.96 1035
5 0.96 0.96 0.96 536
6 0.99 0.99 0.99 1317
7 0.95 0.94 0.94 538
8 0.99 0.99 0.99 1275
9 0.99 0.99 0.99 1205
10 0.97 0.96 0.97 960
11 0.96 0.93 0.95 312
12 0.98 0.98 0.98 1202
13 1.00 0.99 0.99 1315
14 0.98 1.00 0.99 1023
15 0.97 0.95 0.96 908
16 0.99 0.99 0.99 966
17 0.99 1.00 0.99 979
18 0.90 0.95 0.92 1148
19 0.94 0.92 0.93 530
20 0.95 0.96 0.96 1086
21 0.95 0.96 0.95 473
22 0.99 0.99 0.99 997
23 0.73 0.72 0.72 905
24 0.95 0.96 0.96 937
25 0.99 0.98 0.99 780
26 0.95 0.95 0.95 880
27 0.77 0.76 0.77 975
28 0.92 0.91 0.92 873
accuracy 0.97 39224
macro avg 0.95 0.95 0.95 39224
weighted avg 0.97 0.97 0.97 39224
Accuracy: 0.9686
=== Evaluation Metrics ===
Label Accuracy Sensitivity Specificity PPV NPV
ANXA11_WT_Untreated 0.999618 0.991623 0.999817 0.992662 0.999791
CLTC_WT_Untreated 0.997145 0.940870 0.998844 0.960924 0.998215
Calreticulin_WT_Untreated 0.998394 0.981175 0.998997 0.971663 0.999340
DAPI_WT_Untreated 0.999694 1.000000 0.999549 0.999051 1.000000
DCP1A_WT_Untreated 0.998011 0.943961 0.999476 0.979940 0.998483
FMRP_WT_Untreated 0.998929 0.964552 0.999406 0.957407 0.999509
FUS_WT_Untreated 0.999159 0.985573 0.999631 0.989329 0.999499
G3BP1_WT_Untreated 0.998470 0.940520 0.999276 0.947566 0.999173
GM130_WT_Untreated 0.999312 0.989020 0.999657 0.989796 0.999631
HNRNPA1_WT_Untreated 0.999159 0.985892 0.999579 0.986711 0.999553
KIF5A_WT_Untreated 0.998368 0.961458 0.999294 0.971579 0.999033
LAMP1_WT_Untreated 0.999159 0.932692 0.999692 0.960396 0.999460
LSM14A_WT_Untreated 0.998955 0.983361 0.999448 0.982544 0.999474
NCL_WT_Untreated 0.999618 0.993156 0.999842 0.995427 0.999763
NEMO_WT_Untreated 0.999439 0.997067 0.999503 0.981713 0.999921
NONO_WT_Untreated 0.998088 0.948238 0.999269 0.968504 0.998774
PEX14_WT_Untreated 0.999465 0.991718 0.999660 0.986612 0.999791
PML_WT_Untreated 0.999745 0.998979 0.999765 0.990881 0.999974
PSD95_WT_Untreated 0.995309 0.947735 0.996743 0.897690 0.998422
PURA_WT_Untreated 0.998113 0.924528 0.999121 0.935115 0.998966
Phalloidin_WT_Untreated 0.997527 0.957643 0.998663 0.953254 0.998794
SNCA_WT_Untreated 0.998827 0.955603 0.999355 0.947589 0.999458
SON_WT_Untreated 0.999541 0.992979 0.999712 0.989011 0.999817
SQSTM1_WT_Untreated 0.987329 0.721547 0.993606 0.727171 0.993425
TDP43_WT_Untreated 0.998062 0.964781 0.998877 0.954593 0.999138
TIA1_WT_Untreated 0.999439 0.982051 0.999792 0.989664 0.999636
TOMM20_WT_Untreated 0.997731 0.947727 0.998879 0.950969 0.998800
Tubulin_WT_Untreated 0.988349 0.764103 0.994065 0.766461 0.993987
mitotracker_WT_Untreated 0.996278 0.912944 0.998175 0.919262 0.998019
Macro Average 0.997836 0.951776 0.998886 0.953224 0.998891
run_train_test_split_baseline(
dataset_config,
batches=[1,2,3,7,8,9],
classifier_class=cuMLLogisticRegression,
classifier_kwargs={},
)
2025-08-19 14:19:50 INFO: [load_embeddings] multiplex=False 2025-08-19 14:19:50 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 14:19:50 INFO: [load_embeddings] input_folders = ['batch1'] 2025-08-19 14:19:50 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-19 14:19:55 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 14:19:57 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 14:19:59 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 14:19:59 INFO: [load_embeddings] embeddings shape: (196119, 192) 2025-08-19 14:19:59 INFO: [load_embeddings] labels shape: (196119,) 2025-08-19 14:19:59 INFO: [load_embeddings] example label: DAPI_WT_Untreated 2025-08-19 14:19:59 INFO: [load_embeddings] paths shape: (196119,) 2025-08-19 14:19:59 INFO: [load_embeddings] multiplex=False 2025-08-19 14:19:59 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 14:19:59 INFO: [load_embeddings] input_folders = ['batch2'] 2025-08-19 14:19:59 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-19 14:20:03 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 14:20:04 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 14:20:05 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 14:20:05 INFO: [load_embeddings] embeddings shape: (141079, 192) 2025-08-19 14:20:05 INFO: [load_embeddings] labels shape: (141079,) 2025-08-19 14:20:05 INFO: [load_embeddings] example label: HNRNPA1_WT_Untreated 2025-08-19 14:20:05 INFO: [load_embeddings] paths shape: (141079,) 2025-08-19 14:20:06 INFO: [load_embeddings] multiplex=False 2025-08-19 14:20:06 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 14:20:06 INFO: [load_embeddings] input_folders = ['batch3'] 2025-08-19 14:20:06 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-19 14:20:09 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 14:20:11 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 14:20:12 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 14:20:12 INFO: [load_embeddings] embeddings shape: (134336, 192) 2025-08-19 14:20:12 INFO: [load_embeddings] labels shape: (134336,) 2025-08-19 14:20:12 INFO: [load_embeddings] example label: TOMM20_WT_Untreated 2025-08-19 14:20:12 INFO: [load_embeddings] paths shape: (134336,) 2025-08-19 14:20:12 INFO: [load_embeddings] multiplex=False 2025-08-19 14:20:12 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 14:20:12 INFO: [load_embeddings] input_folders = ['batch7'] 2025-08-19 14:20:12 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-19 14:20:17 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 14:20:19 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 14:20:20 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 14:20:21 INFO: [load_embeddings] embeddings shape: (189079, 192) 2025-08-19 14:20:21 INFO: [load_embeddings] labels shape: (189079,) 2025-08-19 14:20:21 INFO: [load_embeddings] example label: DAPI_WT_Untreated 2025-08-19 14:20:21 INFO: [load_embeddings] paths shape: (189079,) 2025-08-19 14:20:21 INFO: [load_embeddings] multiplex=False 2025-08-19 14:20:21 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 14:20:21 INFO: [load_embeddings] input_folders = ['batch8'] 2025-08-19 14:20:21 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-19 14:20:26 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 14:20:28 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 14:20:30 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 14:20:30 INFO: [load_embeddings] embeddings shape: (169304, 192) 2025-08-19 14:20:30 INFO: [load_embeddings] labels shape: (169304,) 2025-08-19 14:20:30 INFO: [load_embeddings] example label: DCP1A_WT_Untreated 2025-08-19 14:20:30 INFO: [load_embeddings] paths shape: (169304,) 2025-08-19 14:20:30 INFO: [load_embeddings] multiplex=False 2025-08-19 14:20:30 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 14:20:30 INFO: [load_embeddings] input_folders = ['batch9'] 2025-08-19 14:20:30 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-19 14:20:36 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 14:20:38 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 14:20:39 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 14:20:40 INFO: [load_embeddings] embeddings shape: (196652, 192) 2025-08-19 14:20:40 INFO: [load_embeddings] labels shape: (196652,) 2025-08-19 14:20:40 INFO: [load_embeddings] example label: TIA1_WT_Untreated 2025-08-19 14:20:40 INFO: [load_embeddings] paths shape: (196652,)
Train dataset
(821255,) (821255, 192) [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27 28]
4: 24851
24: 19519
3: 267743
1: 23979
8: 25202
23: 18932
16: 19980
9: 24439
28: 20992
10: 20618
6: 20978
19: 9021
0: 19489
22: 21862
14: 23518
13: 25858
7: 9709
25: 16352
2: 26026
21: 10724
15: 20310
20: 22194
18: 21782
26: 20826
27: 23450
17: 19866
11: 9023
12: 24906
5: 9106
Test dataset
(205314,) (205314, 192) [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27 28]
28: 5248
0: 4872
3: 66936
22: 5465
13: 6465
8: 6300
17: 4966
25: 4088
6: 5244
23: 4733
14: 5879
4: 6213
18: 5446
1: 5995
19: 2255
24: 4880
20: 5549
15: 5078
16: 4995
2: 6507
27: 5863
26: 5207
10: 5155
12: 6226
9: 6110
5: 2276
21: 2681
11: 2255
7: 2427
precision recall f1-score support
0 0.98 0.97 0.98 4872
1 0.96 0.94 0.95 5995
2 0.93 0.94 0.94 6507
3 1.00 1.00 1.00 66936
4 0.97 0.95 0.96 6213
5 0.96 0.96 0.96 2276
6 0.94 0.95 0.95 5244
7 0.95 0.96 0.95 2427
8 0.98 0.97 0.97 6300
9 0.96 0.95 0.95 6110
10 0.97 0.97 0.97 5155
11 0.97 0.95 0.96 2255
12 0.97 0.98 0.97 6226
13 0.99 0.99 0.99 6465
14 0.98 0.99 0.98 5879
15 0.93 0.92 0.93 5078
16 0.99 0.98 0.99 4995
17 0.99 1.00 0.99 4966
18 0.92 0.95 0.94 5446
19 0.95 0.94 0.95 2255
20 0.93 0.94 0.93 5549
21 0.94 0.94 0.94 2681
22 1.00 1.00 1.00 5465
23 0.70 0.61 0.65 4733
24 0.92 0.93 0.93 4880
25 0.98 0.99 0.99 4088
26 0.95 0.96 0.95 5207
27 0.74 0.81 0.78 5863
28 0.92 0.91 0.91 5248
accuracy 0.96 205314
macro avg 0.94 0.94 0.94 205314
weighted avg 0.96 0.96 0.96 205314
Accuracy: 0.9604
=== Evaluation Metrics ===
Label Accuracy Sensitivity Specificity PPV NPV
ANXA11_WT_Untreated 0.999006 0.973933 0.999616 0.984032 0.999367
CLTC_WT_Untreated 0.996868 0.935780 0.998706 0.956033 0.998070
Calreticulin_WT_Untreated 0.995943 0.939604 0.997787 0.932865 0.998023
DAPI_WT_Untreated 0.999776 1.000000 0.999668 0.999313 1.000000
DCP1A_WT_Untreated 0.997686 0.954611 0.999031 0.968485 0.998584
FMRP_WT_Untreated 0.999206 0.964851 0.999591 0.963581 0.999606
FUS_WT_Untreated 0.997263 0.954043 0.998396 0.939707 0.998795
G3BP1_WT_Untreated 0.998865 0.956737 0.999369 0.947755 0.999482
GM130_WT_Untreated 0.998461 0.973810 0.999241 0.975978 0.999171
HNRNPA1_WT_Untreated 0.997219 0.945172 0.998815 0.960739 0.998319
KIF5A_WT_Untreated 0.998432 0.966634 0.999251 0.970777 0.999141
LAMP1_WT_Untreated 0.999079 0.950333 0.999621 0.965315 0.999449
LSM14A_WT_Untreated 0.998344 0.976068 0.999041 0.969528 0.999251
NCL_WT_Untreated 0.999445 0.988708 0.999794 0.993627 0.999633
NEMO_WT_Untreated 0.999118 0.990304 0.999378 0.979146 0.999714
NONO_WT_Untreated 0.996483 0.923395 0.998337 0.933692 0.998058
PEX14_WT_Untreated 0.999299 0.984785 0.999661 0.986365 0.999621
PML_WT_Untreated 0.999664 0.996979 0.999730 0.989211 0.999925
PSD95_WT_Untreated 0.996605 0.950239 0.997869 0.923942 0.998643
PURA_WT_Untreated 0.998851 0.940576 0.999498 0.954116 0.999340
Phalloidin_WT_Untreated 0.996420 0.942512 0.997918 0.926320 0.998402
SNCA_WT_Untreated 0.998466 0.937337 0.999275 0.944737 0.999171
SON_WT_Untreated 0.999805 0.996706 0.999890 0.995977 0.999910
SQSTM1_WT_Untreated 0.984979 0.613142 0.993753 0.698436 0.990898
TDP43_WT_Untreated 0.996479 0.927664 0.998154 0.924444 0.998239
TIA1_WT_Untreated 0.999430 0.988503 0.999652 0.982973 0.999766
TOMM20_WT_Untreated 0.997565 0.956405 0.998636 0.948030 0.998865
Tubulin_WT_Untreated 0.986586 0.809995 0.991777 0.743309 0.994400
mitotracker_WT_Untreated 0.995461 0.906250 0.997801 0.915319 0.997541
Macro Average 0.997269 0.942934 0.998595 0.943922 0.998599
Cytoself_dataset_config = {
"path_to_embeddings": "/home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/",
"multiplexed": False,
"config_fmt": "newNeuronsD8FigureConfig_UMAP1_B{batch}",
"config_dir": "manuscript/manuscript_figures_data_config",
}
## Baseline
run_baseline_model(
dataset_config= Cytoself_dataset_config,
batches=[1, 2, 3, 7, 8, 9],
balance=False,
norm=False,
choose_features=False,
top_k=100,
label_map=None,
classifier_class=cuMLLogisticRegression,
classifier_kwargs={},
)
2025-08-19 14:33:15 INFO: [load_embeddings] multiplex=False 2025-08-19 14:33:15 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 14:33:15 INFO: [load_embeddings] input_folders = ['batch1'] 2025-08-19 14:33:15 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/
Loading all batches...
2025-08-19 14:33:37 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 14:33:43 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 14:33:46 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 14:33:47 INFO: [load_embeddings] embeddings shape: (192220, 2048) 2025-08-19 14:33:47 INFO: [load_embeddings] labels shape: (192220,) 2025-08-19 14:33:47 INFO: [load_embeddings] example label: ANXA11_WT_Untreated 2025-08-19 14:33:47 INFO: [load_embeddings] paths shape: (192220,) 2025-08-19 14:33:48 INFO: [load_embeddings] multiplex=False 2025-08-19 14:33:48 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 14:33:48 INFO: [load_embeddings] input_folders = ['batch2'] 2025-08-19 14:33:48 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/ 2025-08-19 14:34:28 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 14:34:32 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 14:34:35 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 14:34:35 INFO: [load_embeddings] embeddings shape: (137464, 2048) 2025-08-19 14:34:35 INFO: [load_embeddings] labels shape: (137464,) 2025-08-19 14:34:35 INFO: [load_embeddings] example label: ANXA11_WT_Untreated 2025-08-19 14:34:35 INFO: [load_embeddings] paths shape: (137464,) 2025-08-19 14:34:36 INFO: [load_embeddings] multiplex=False 2025-08-19 14:34:36 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 14:34:36 INFO: [load_embeddings] input_folders = ['batch3'] 2025-08-19 14:34:36 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/ 2025-08-19 14:35:12 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 14:35:16 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 14:35:18 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 14:35:19 INFO: [load_embeddings] embeddings shape: (130788, 2048) 2025-08-19 14:35:19 INFO: [load_embeddings] labels shape: (130788,) 2025-08-19 14:35:19 INFO: [load_embeddings] example label: ANXA11_WT_Untreated 2025-08-19 14:35:19 INFO: [load_embeddings] paths shape: (130788,) 2025-08-19 14:35:20 INFO: [load_embeddings] multiplex=False 2025-08-19 14:35:20 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 14:35:20 INFO: [load_embeddings] input_folders = ['batch7'] 2025-08-19 14:35:20 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/ 2025-08-19 14:35:44 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 14:35:52 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 14:35:56 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 14:35:58 INFO: [load_embeddings] embeddings shape: (185840, 2048) 2025-08-19 14:35:58 INFO: [load_embeddings] labels shape: (185840,) 2025-08-19 14:35:58 INFO: [load_embeddings] example label: ANXA11_WT_Untreated 2025-08-19 14:35:58 INFO: [load_embeddings] paths shape: (185840,) 2025-08-19 14:35:59 INFO: [load_embeddings] multiplex=False 2025-08-19 14:35:59 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 14:35:59 INFO: [load_embeddings] input_folders = ['batch8'] 2025-08-19 14:35:59 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/ 2025-08-19 14:36:30 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 14:36:39 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 14:36:44 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 14:36:45 INFO: [load_embeddings] embeddings shape: (166314, 2048) 2025-08-19 14:36:45 INFO: [load_embeddings] labels shape: (166314,) 2025-08-19 14:36:45 INFO: [load_embeddings] example label: ANXA11_WT_Untreated 2025-08-19 14:36:45 INFO: [load_embeddings] paths shape: (166314,) 2025-08-19 14:36:46 INFO: [load_embeddings] multiplex=False 2025-08-19 14:36:46 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 14:36:46 INFO: [load_embeddings] input_folders = ['batch9'] 2025-08-19 14:36:46 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/ 2025-08-19 14:37:16 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 14:37:23 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 14:37:27 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 14:37:28 INFO: [load_embeddings] embeddings shape: (193503, 2048) 2025-08-19 14:37:28 INFO: [load_embeddings] labels shape: (193503,) 2025-08-19 14:37:28 INFO: [load_embeddings] example label: ANXA11_WT_Untreated 2025-08-19 14:37:28 INFO: [load_embeddings] paths shape: (193503,)
Batches loaded.
Training on Batches: [2, 3, 7, 8, 9], Testing on: [1].
=== Batch [1] ===
Train: (813909, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
Test: (192220, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
ANXA11_WT_Untreated: 19584
CLTC_WT_Untreated: 24225
Calreticulin_WT_Untreated: 25894
DAPI_WT_Untreated: 271498
DCP1A_WT_Untreated: 25887
FMRP_WT_Untreated: 8704
FUS_WT_Untreated: 19636
G3BP1_WT_Untreated: 9447
GM130_WT_Untreated: 25126
HNRNPA1_WT_Untreated: 24526
KIF5A_WT_Untreated: 20974
LAMP1_WT_Untreated: 9717
LSM14A_WT_Untreated: 25124
NCL_WT_Untreated: 25750
NEMO_WT_Untreated: 24282
NONO_WT_Untreated: 20846
PEX14_WT_Untreated: 20145
PML_WT_Untreated: 19937
PSD95_WT_Untreated: 21489
PURA_WT_Untreated: 8626
Phalloidin_WT_Untreated: 22312
SNCA_WT_Untreated: 11037
SON_WT_Untreated: 22340
SQSTM1_WT_Untreated: 19138
TDP43_WT_Untreated: 19716
TOMM20_WT_Untreated: 21633
Tubulin_WT_Untreated: 24440
mitotracker_WT_Untreated: 21876
[W] [14:38:36.025392] L-BFGS: max iterations reached
[W] [14:38:36.026564] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
precision recall f1-score support
0 0.99 0.98 0.99 4777
1 0.94 0.99 0.96 5749
2 0.99 0.99 0.99 6639
3 1.00 1.00 1.00 63181
4 0.94 0.97 0.96 5177
5 0.97 0.94 0.96 2678
6 0.96 0.94 0.95 6586
7 0.99 0.90 0.94 2689
8 0.98 0.99 0.98 6376
9 0.96 0.95 0.95 6023
10 0.95 0.98 0.96 4799
11 0.95 0.89 0.92 1561
12 0.98 0.98 0.98 6008
13 0.99 0.99 0.99 6573
14 1.00 1.00 1.00 5115
15 0.93 0.97 0.95 4542
16 0.98 0.99 0.99 4830
17 1.00 0.99 0.99 4895
18 0.77 0.99 0.87 5739
19 0.95 0.87 0.91 2650
20 0.96 0.96 0.96 5431
21 0.98 0.97 0.97 2368
22 0.99 1.00 0.99 4987
23 0.82 0.76 0.79 4527
24 0.96 0.93 0.95 4683
25 0.96 0.96 0.96 4400
26 0.81 0.87 0.84 4873
27 0.96 0.58 0.72 4364
accuracy 0.96 192220
macro avg 0.95 0.94 0.94 192220
weighted avg 0.97 0.96 0.96 192220
Training on Batches: [1, 3, 7, 8, 9], Testing on: [2].
=== Batch [2] ===
Train: (868665, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
Test: (137464, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
ANXA11_WT_Untreated: 20574
CLTC_WT_Untreated: 25663
Calreticulin_WT_Untreated: 29143
DAPI_WT_Untreated: 289648
DCP1A_WT_Untreated: 27007
FMRP_WT_Untreated: 10444
FUS_WT_Untreated: 22609
G3BP1_WT_Untreated: 11183
GM130_WT_Untreated: 28274
HNRNPA1_WT_Untreated: 26333
KIF5A_WT_Untreated: 21917
LAMP1_WT_Untreated: 9903
LSM14A_WT_Untreated: 26961
NCL_WT_Untreated: 28699
NEMO_WT_Untreated: 25445
NONO_WT_Untreated: 20846
PEX14_WT_Untreated: 21109
PML_WT_Untreated: 21075
PSD95_WT_Untreated: 22936
PURA_WT_Untreated: 10490
Phalloidin_WT_Untreated: 23886
SNCA_WT_Untreated: 10936
SON_WT_Untreated: 22776
SQSTM1_WT_Untreated: 20119
TDP43_WT_Untreated: 20703
TOMM20_WT_Untreated: 22332
Tubulin_WT_Untreated: 25416
mitotracker_WT_Untreated: 22238
[W] [14:39:41.418421] L-BFGS: max iterations reached
[W] [14:39:41.487878] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
precision recall f1-score support
0 0.96 1.00 0.98 3787
1 0.97 0.99 0.98 4311
2 0.96 0.97 0.96 3390
3 0.99 1.00 1.00 45031
4 0.98 0.96 0.97 4057
5 0.94 0.97 0.96 938
6 0.95 0.85 0.90 3613
7 0.90 0.98 0.94 953
8 0.98 0.99 0.98 3228
9 0.90 0.96 0.93 4216
10 0.97 0.96 0.97 3856
11 0.95 0.93 0.94 1375
12 0.96 0.98 0.97 4171
13 0.99 0.98 0.98 3624
14 1.00 0.99 0.99 3952
15 0.84 0.90 0.87 4542
16 0.99 0.99 0.99 3866
17 1.00 0.99 0.99 3757
18 0.99 0.95 0.97 4292
19 0.91 0.97 0.94 786
20 0.94 0.92 0.93 3857
21 0.98 0.96 0.97 2469
22 1.00 1.00 1.00 4551
23 0.76 0.83 0.79 3546
24 0.86 0.79 0.82 3696
25 0.94 0.93 0.94 3701
26 0.90 0.78 0.83 3897
27 0.94 0.97 0.96 4002
accuracy 0.96 137464
macro avg 0.94 0.95 0.94 137464
weighted avg 0.96 0.96 0.96 137464
Training on Batches: [1, 2, 7, 8, 9], Testing on: [3].
=== Batch [3] ===
Train: (875341, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
Test: (130788, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
ANXA11_WT_Untreated: 20494
CLTC_WT_Untreated: 26550
Calreticulin_WT_Untreated: 28546
DAPI_WT_Untreated: 291501
DCP1A_WT_Untreated: 27588
FMRP_WT_Untreated: 9628
FUS_WT_Untreated: 22519
G3BP1_WT_Untreated: 10290
GM130_WT_Untreated: 27676
HNRNPA1_WT_Untreated: 26716
KIF5A_WT_Untreated: 22810
LAMP1_WT_Untreated: 10360
LSM14A_WT_Untreated: 27335
NCL_WT_Untreated: 28627
NEMO_WT_Untreated: 25953
NONO_WT_Untreated: 21731
PEX14_WT_Untreated: 21028
PML_WT_Untreated: 21828
PSD95_WT_Untreated: 23714
PURA_WT_Untreated: 9719
Phalloidin_WT_Untreated: 24168
SNCA_WT_Untreated: 10991
SON_WT_Untreated: 23673
SQSTM1_WT_Untreated: 20505
TDP43_WT_Untreated: 20828
TOMM20_WT_Untreated: 22202
Tubulin_WT_Untreated: 26110
mitotracker_WT_Untreated: 22251
[W] [14:40:51.721292] L-BFGS: max iterations reached
[W] [14:40:51.722460] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
precision recall f1-score support
0 0.94 0.99 0.97 3867
1 0.91 0.92 0.92 3424
2 0.99 0.97 0.98 3987
3 1.00 1.00 1.00 43178
4 0.96 0.93 0.94 3476
5 0.90 0.94 0.92 1754
6 0.96 0.96 0.96 3703
7 0.94 0.90 0.92 1846
8 0.97 0.99 0.98 3826
9 0.97 0.97 0.97 3833
10 0.97 0.93 0.95 2963
11 0.91 0.92 0.92 918
12 0.96 0.97 0.97 3797
13 0.99 0.99 0.99 3696
14 0.99 0.96 0.97 3444
15 0.97 0.78 0.86 3657
16 0.95 0.99 0.97 3947
17 0.99 0.98 0.99 3004
18 0.97 0.96 0.97 3514
19 0.90 0.87 0.88 1557
20 0.80 0.88 0.84 3575
21 0.95 0.98 0.96 2414
22 1.00 0.99 1.00 3654
23 0.73 0.72 0.72 3160
24 0.81 0.97 0.89 3571
25 0.82 0.96 0.89 3831
26 0.91 0.71 0.80 3203
27 0.96 0.84 0.89 3989
accuracy 0.95 130788
macro avg 0.93 0.93 0.93 130788
weighted avg 0.95 0.95 0.95 130788
Training on Batches: [1, 2, 3, 8, 9], Testing on: [7].
=== Batch [7] ===
Train: (820289, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
Test: (185840, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
ANXA11_WT_Untreated: 19572
CLTC_WT_Untreated: 23804
Calreticulin_WT_Untreated: 26534
DAPI_WT_Untreated: 270193
DCP1A_WT_Untreated: 25278
FMRP_WT_Untreated: 9656
FUS_WT_Untreated: 26185
G3BP1_WT_Untreated: 10348
GM130_WT_Untreated: 25432
HNRNPA1_WT_Untreated: 25487
KIF5A_WT_Untreated: 20280
LAMP1_WT_Untreated: 9181
LSM14A_WT_Untreated: 25505
NCL_WT_Untreated: 26158
NEMO_WT_Untreated: 25027
NONO_WT_Untreated: 20764
PEX14_WT_Untreated: 20964
PML_WT_Untreated: 19880
PSD95_WT_Untreated: 22164
PURA_WT_Untreated: 9113
Phalloidin_WT_Untreated: 22211
SNCA_WT_Untreated: 11279
SON_WT_Untreated: 21753
SQSTM1_WT_Untreated: 18580
TDP43_WT_Untreated: 19874
TOMM20_WT_Untreated: 21012
Tubulin_WT_Untreated: 22723
mitotracker_WT_Untreated: 21332
[W] [14:42:17.417366] L-BFGS: max iterations reached
[W] [14:42:17.427980] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
precision recall f1-score support
0 0.99 0.98 0.99 4789
1 0.97 0.97 0.97 6170
2 0.94 0.82 0.88 5999
3 1.00 1.00 1.00 64486
4 0.94 0.87 0.90 5786
5 0.98 0.94 0.96 1726
6 0.00 0.00 0.00 37
7 0.92 0.98 0.95 1788
8 0.51 0.56 0.53 6070
9 0.95 0.13 0.22 5062
10 0.98 0.98 0.98 5493
11 0.46 0.98 0.63 2097
12 0.94 0.94 0.94 5627
13 0.98 0.20 0.33 6165
14 0.92 0.99 0.95 4370
15 0.84 0.96 0.90 4624
16 0.99 0.99 0.99 4011
17 0.89 1.00 0.94 4952
18 0.90 1.00 0.95 5064
19 0.94 0.96 0.95 2163
20 0.95 0.96 0.96 5532
21 0.93 0.95 0.94 2126
22 0.88 1.00 0.93 5574
23 0.64 0.77 0.70 5085
24 0.95 0.77 0.85 4525
25 0.99 0.95 0.97 5021
26 0.82 0.85 0.83 6590
27 0.96 0.94 0.95 4908
accuracy 0.90 185840
macro avg 0.86 0.84 0.82 185840
weighted avg 0.93 0.90 0.89 185840
Training on Batches: [1, 2, 3, 7, 9], Testing on: [8].
=== Batch [8] ===
Train: (839815, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
Test: (166314, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
ANXA11_WT_Untreated: 20291
CLTC_WT_Untreated: 26025
Calreticulin_WT_Untreated: 26659
DAPI_WT_Untreated: 278916
DCP1A_WT_Untreated: 25323
FMRP_WT_Untreated: 9449
FUS_WT_Untreated: 20373
G3BP1_WT_Untreated: 9971
GM130_WT_Untreated: 25865
HNRNPA1_WT_Untreated: 25041
KIF5A_WT_Untreated: 22126
LAMP1_WT_Untreated: 8561
LSM14A_WT_Untreated: 25648
NCL_WT_Untreated: 26475
NEMO_WT_Untreated: 23656
NONO_WT_Untreated: 20689
PEX14_WT_Untreated: 20900
PML_WT_Untreated: 21457
PSD95_WT_Untreated: 24965
PURA_WT_Untreated: 9460
Phalloidin_WT_Untreated: 23674
SNCA_WT_Untreated: 11895
SON_WT_Untreated: 22072
SQSTM1_WT_Untreated: 20396
TDP43_WT_Untreated: 20633
TOMM20_WT_Untreated: 22224
Tubulin_WT_Untreated: 24531
mitotracker_WT_Untreated: 22540
[W] [14:43:29.225727] L-BFGS: max iterations reached
[W] [14:43:29.227037] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
precision recall f1-score support
0 0.98 0.98 0.98 4070
1 0.97 0.91 0.94 3949
2 0.97 0.98 0.97 5874
3 0.99 1.00 1.00 55763
4 0.97 0.98 0.98 5741
5 0.91 0.96 0.93 1933
6 0.95 0.39 0.56 5849
7 0.95 0.95 0.95 2165
8 0.99 0.73 0.84 5637
9 0.60 0.98 0.74 5508
10 0.98 0.98 0.98 3647
11 0.97 0.94 0.96 2717
12 0.97 0.98 0.98 5484
13 0.92 0.98 0.95 5848
14 0.99 1.00 1.00 5741
15 0.94 0.95 0.95 4699
16 0.92 0.98 0.95 4075
17 0.92 0.99 0.96 3375
18 0.99 0.97 0.98 2263
19 0.93 0.95 0.94 1816
20 0.92 0.93 0.92 4069
21 0.94 0.98 0.96 1510
22 0.97 1.00 0.98 5255
23 0.74 0.76 0.75 3269
24 0.95 0.92 0.94 3766
25 0.91 0.94 0.92 3809
26 0.85 0.83 0.84 4782
27 0.95 0.94 0.94 3700
accuracy 0.94 166314
macro avg 0.93 0.93 0.92 166314
weighted avg 0.95 0.94 0.94 166314
Training on Batches: [1, 2, 3, 7, 8], Testing on: [9].
=== Batch [9] ===
Train: (812626, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
Test: (193503, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
ANXA11_WT_Untreated: 21290
CLTC_WT_Untreated: 23603
Calreticulin_WT_Untreated: 25889
DAPI_WT_Untreated: 271639
DCP1A_WT_Untreated: 24237
FMRP_WT_Untreated: 9029
FUS_WT_Untreated: 19788
G3BP1_WT_Untreated: 9441
GM130_WT_Untreated: 25137
HNRNPA1_WT_Untreated: 24642
KIF5A_WT_Untreated: 20758
LAMP1_WT_Untreated: 8668
LSM14A_WT_Untreated: 25087
NCL_WT_Untreated: 25906
NEMO_WT_Untreated: 22622
NONO_WT_Untreated: 22064
PEX14_WT_Untreated: 20729
PML_WT_Untreated: 19983
PSD95_WT_Untreated: 20872
PURA_WT_Untreated: 8972
Phalloidin_WT_Untreated: 22464
SNCA_WT_Untreated: 10887
SON_WT_Untreated: 24021
SQSTM1_WT_Untreated: 19587
TDP43_WT_Untreated: 20241
TOMM20_WT_Untreated: 20762
Tubulin_WT_Untreated: 23345
mitotracker_WT_Untreated: 20963
[W] [14:44:45.044685] L-BFGS: max iterations reached
[W] [14:44:45.045819] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
precision recall f1-score support
0 0.99 0.84 0.91 3071
1 0.99 0.96 0.98 6371
2 0.98 0.98 0.98 6644
3 1.00 1.00 1.00 63040
4 0.97 0.99 0.98 6827
5 0.99 0.98 0.98 2353
6 0.89 0.92 0.90 6434
7 0.96 0.98 0.97 2695
8 0.99 0.99 0.99 6365
9 0.94 0.86 0.90 5907
10 0.96 0.98 0.97 5015
11 0.96 0.98 0.97 2610
12 0.99 0.98 0.98 6045
13 0.99 0.99 0.99 6417
14 0.99 1.00 0.99 6775
15 0.84 0.77 0.80 3324
16 0.99 0.97 0.98 4246
17 0.99 1.00 0.99 4849
18 0.99 0.93 0.96 6356
19 0.97 0.93 0.95 2304
20 0.93 0.95 0.94 5279
21 0.87 0.94 0.90 2518
22 1.00 1.00 1.00 3306
23 0.80 0.48 0.60 4078
24 0.83 0.88 0.85 4158
25 0.98 0.94 0.96 5271
26 0.72 0.94 0.82 5968
27 0.88 0.97 0.93 5277
accuracy 0.96 193503
macro avg 0.94 0.93 0.93 193503
weighted avg 0.96 0.96 0.96 193503
=== Overall Accuracy ===
0.9451051678470289 [0.963271251690771, 0.9605569458185416, 0.9519221946967612, 0.8955068876452863, 0.9417968421179215, 0.9575768851128923]
=== Evaluation Metrics ===
Label Accuracy Sensitivity Specificity PPV NPV
ANXA11_WT_Untreated 0.998684 0.968433 0.999435 0.977016 0.999217
CLTC_WT_Untreated 0.997746 0.961433 0.998861 0.962847 0.998816
Calreticulin_WT_Untreated 0.997439 0.949928 0.999026 0.970237 0.998328
DAPI_WT_Untreated 0.998120 0.998473 0.997943 0.995884 0.999238
DCP1A_WT_Untreated 0.997252 0.951616 0.998706 0.959057 0.998459
FMRP_WT_Untreated 0.998912 0.955368 0.999410 0.948783 0.999489
FUS_WT_Untreated 0.989038 0.802647 0.994026 0.782387 0.994715
G3BP1_WT_Untreated 0.998745 0.944463 0.999407 0.951124 0.999322
GM130_WT_Untreated 0.991966 0.861660 0.996178 0.879329 0.995531
HNRNPA1_WT_Untreated 0.989335 0.805296 0.995098 0.837253 0.993910
KIF5A_WT_Untreated 0.998378 0.970434 0.999113 0.966385 0.999223
LAMP1_WT_Untreated 0.996608 0.947508 0.997164 0.791145 0.999404
LSM14A_WT_Untreated 0.998151 0.972890 0.998958 0.967544 0.999134
NCL_WT_Untreated 0.994034 0.837175 0.999240 0.973381 0.994620
NEMO_WT_Untreated 0.999170 0.990033 0.999445 0.981718 0.999700
NONO_WT_Untreated 0.994671 0.898692 0.997155 0.891041 0.997377
PEX14_WT_Untreated 0.998949 0.986667 0.999262 0.971458 0.999660
PML_WT_Untreated 0.998828 0.993194 0.998971 0.960659 0.999828
PSD95_WT_Untreated 0.996603 0.966432 0.997442 0.913110 0.999065
PURA_WT_Untreated 0.998418 0.920007 0.999306 0.937636 0.999094
Phalloidin_WT_Untreated 0.996073 0.937318 0.997739 0.921605 0.998222
SNCA_WT_Untreated 0.998665 0.961209 0.999171 0.939962 0.999476
SON_WT_Untreated 0.998893 0.997219 0.998940 0.963307 0.999922
SQSTM1_WT_Untreated 0.987360 0.718149 0.993845 0.737566 0.993215
TDP43_WT_Untreated 0.994420 0.875241 0.997382 0.892581 0.996901
TOMM20_WT_Untreated 0.996973 0.949334 0.998238 0.934685 0.998654
Tubulin_WT_Untreated 0.989865 0.842868 0.994276 0.815466 0.995280
mitotracker_WT_Untreated 0.995232 0.876944 0.998400 0.936206 0.996710
Macro Average 0.996019 0.922880 0.997933 0.919978 0.997947
## Baseline
run_baseline_model(
dataset_config= Cytoself_dataset_config,
batches=[1, 2, 3, 7, 8, 9],
balance=False,
norm=False,
choose_features=False,
top_k=100,
label_map=None,
classifier_class=cuMLLogisticRegression,
classifier_kwargs={},
train_specific_batches = [1],
results_csv = 'classification_results-indi.csv'
)
2025-08-20 22:15:17 INFO: [newNeuronsD8FigureConfig_UMAP1_B1] Init (log path: /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/logs/200825_221517_523876_98867_galavir_sysdashboardsysjupyter.log; JOBID: 98867 Username: galavir) JOBNAME: sysdashboardsysjupyter 2025-08-20 22:15:17 INFO: [newNeuronsD8FigureConfig_UMAP1_B1] NOVA_HOME=/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA, NOVA_DATA_HOME=/home/projects/hornsteinlab/Collaboration/NOVA/input 2025-08-20 22:15:17 INFO: [load_embeddings] multiplex=False 2025-08-20 22:15:17 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-20 22:15:17 INFO: [load_embeddings] input_folders = ['batch1'] 2025-08-20 22:15:17 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/
Loading all batches...
2025-08-20 22:15:52 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-20 22:15:59 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-20 22:16:02 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-20 22:16:04 INFO: [load_embeddings] embeddings shape: (192220, 2048) 2025-08-20 22:16:04 INFO: [load_embeddings] labels shape: (192220,) 2025-08-20 22:16:04 INFO: [load_embeddings] example label: ANXA11_WT_Untreated 2025-08-20 22:16:04 INFO: [load_embeddings] paths shape: (192220,) 2025-08-20 22:16:04 INFO: [load_embeddings] multiplex=False 2025-08-20 22:16:04 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-20 22:16:04 INFO: [load_embeddings] input_folders = ['batch2'] 2025-08-20 22:16:04 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/ 2025-08-20 22:16:28 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-20 22:16:33 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-20 22:16:35 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-20 22:16:36 INFO: [load_embeddings] embeddings shape: (137464, 2048) 2025-08-20 22:16:36 INFO: [load_embeddings] labels shape: (137464,) 2025-08-20 22:16:36 INFO: [load_embeddings] example label: ANXA11_WT_Untreated 2025-08-20 22:16:36 INFO: [load_embeddings] paths shape: (137464,) 2025-08-20 22:16:37 INFO: [load_embeddings] multiplex=False 2025-08-20 22:16:37 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-20 22:16:37 INFO: [load_embeddings] input_folders = ['batch3'] 2025-08-20 22:16:37 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/ 2025-08-20 22:17:03 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-20 22:17:07 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-20 22:17:10 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-20 22:17:11 INFO: [load_embeddings] embeddings shape: (130788, 2048) 2025-08-20 22:17:11 INFO: [load_embeddings] labels shape: (130788,) 2025-08-20 22:17:11 INFO: [load_embeddings] example label: ANXA11_WT_Untreated 2025-08-20 22:17:11 INFO: [load_embeddings] paths shape: (130788,) 2025-08-20 22:17:11 INFO: [load_embeddings] multiplex=False 2025-08-20 22:17:11 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-20 22:17:11 INFO: [load_embeddings] input_folders = ['batch7'] 2025-08-20 22:17:11 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/ 2025-08-20 22:17:46 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-20 22:17:53 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-20 22:17:56 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-20 22:17:57 INFO: [load_embeddings] embeddings shape: (185840, 2048) 2025-08-20 22:17:57 INFO: [load_embeddings] labels shape: (185840,) 2025-08-20 22:17:57 INFO: [load_embeddings] example label: ANXA11_WT_Untreated 2025-08-20 22:17:57 INFO: [load_embeddings] paths shape: (185840,) 2025-08-20 22:17:58 INFO: [load_embeddings] multiplex=False 2025-08-20 22:17:58 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-20 22:17:58 INFO: [load_embeddings] input_folders = ['batch8'] 2025-08-20 22:17:58 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/ 2025-08-20 22:18:37 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-20 22:18:44 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-20 22:18:48 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-20 22:18:49 INFO: [load_embeddings] embeddings shape: (166314, 2048) 2025-08-20 22:18:49 INFO: [load_embeddings] labels shape: (166314,) 2025-08-20 22:18:49 INFO: [load_embeddings] example label: ANXA11_WT_Untreated 2025-08-20 22:18:49 INFO: [load_embeddings] paths shape: (166314,) 2025-08-20 22:18:50 INFO: [load_embeddings] multiplex=False 2025-08-20 22:18:50 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-20 22:18:50 INFO: [load_embeddings] input_folders = ['batch9'] 2025-08-20 22:18:50 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/ 2025-08-20 22:19:26 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-20 22:19:34 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-20 22:19:38 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-20 22:19:39 INFO: [load_embeddings] embeddings shape: (193503, 2048) 2025-08-20 22:19:39 INFO: [load_embeddings] labels shape: (193503,) 2025-08-20 22:19:39 INFO: [load_embeddings] example label: ANXA11_WT_Untreated 2025-08-20 22:19:39 INFO: [load_embeddings] paths shape: (193503,)
Batches loaded.
Training on Batches: [1], Testing on: [2].
=== Batch [2] ===
Train: (192220, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
Test: (137464, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
ANXA11_WT_Untreated: 4777
CLTC_WT_Untreated: 5749
Calreticulin_WT_Untreated: 6639
DAPI_WT_Untreated: 63181
DCP1A_WT_Untreated: 5177
FMRP_WT_Untreated: 2678
FUS_WT_Untreated: 6586
G3BP1_WT_Untreated: 2689
GM130_WT_Untreated: 6376
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LAMP1_WT_Untreated: 1561
LSM14A_WT_Untreated: 6008
NCL_WT_Untreated: 6573
NEMO_WT_Untreated: 5115
NONO_WT_Untreated: 4542
PEX14_WT_Untreated: 4830
PML_WT_Untreated: 4895
PSD95_WT_Untreated: 5739
PURA_WT_Untreated: 2650
Phalloidin_WT_Untreated: 5431
SNCA_WT_Untreated: 2368
SON_WT_Untreated: 4987
SQSTM1_WT_Untreated: 4527
TDP43_WT_Untreated: 4683
TOMM20_WT_Untreated: 4400
Tubulin_WT_Untreated: 4873
mitotracker_WT_Untreated: 4364
[W] [22:20:05.026860] L-BFGS: max iterations reached
[W] [22:20:05.045447] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
precision recall f1-score support
0 0.96 0.99 0.97 3787
1 0.96 0.97 0.96 4311
2 0.94 0.95 0.95 3390
3 0.99 1.00 1.00 45031
4 0.97 0.94 0.96 4057
5 0.87 0.96 0.91 938
6 0.96 0.78 0.86 3613
7 0.75 0.97 0.85 953
8 0.99 0.98 0.98 3228
9 0.86 0.98 0.91 4216
10 0.97 0.94 0.95 3856
11 0.92 0.88 0.90 1375
12 0.95 0.97 0.96 4171
13 0.99 0.98 0.99 3624
14 1.00 0.99 0.99 3952
15 0.93 0.91 0.92 4542
16 0.97 0.99 0.98 3866
17 0.99 1.00 0.99 3757
18 0.99 0.95 0.97 4292
19 0.85 0.88 0.87 786
20 0.91 0.91 0.91 3857
21 0.97 0.96 0.97 2469
22 0.99 1.00 1.00 4551
23 0.72 0.75 0.73 3546
24 0.90 0.91 0.91 3696
25 0.92 0.93 0.92 3701
26 0.87 0.75 0.80 3897
27 0.95 0.97 0.96 4002
accuracy 0.96 137464
macro avg 0.93 0.94 0.93 137464
weighted avg 0.96 0.96 0.96 137464
Training on Batches: [1], Testing on: [3].
=== Batch [3] ===
Train: (192220, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
Test: (130788, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
ANXA11_WT_Untreated: 4777
CLTC_WT_Untreated: 5749
Calreticulin_WT_Untreated: 6639
DAPI_WT_Untreated: 63181
DCP1A_WT_Untreated: 5177
FMRP_WT_Untreated: 2678
FUS_WT_Untreated: 6586
G3BP1_WT_Untreated: 2689
GM130_WT_Untreated: 6376
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LAMP1_WT_Untreated: 1561
LSM14A_WT_Untreated: 6008
NCL_WT_Untreated: 6573
NEMO_WT_Untreated: 5115
NONO_WT_Untreated: 4542
PEX14_WT_Untreated: 4830
PML_WT_Untreated: 4895
PSD95_WT_Untreated: 5739
PURA_WT_Untreated: 2650
Phalloidin_WT_Untreated: 5431
SNCA_WT_Untreated: 2368
SON_WT_Untreated: 4987
SQSTM1_WT_Untreated: 4527
TDP43_WT_Untreated: 4683
TOMM20_WT_Untreated: 4400
Tubulin_WT_Untreated: 4873
mitotracker_WT_Untreated: 4364
[W] [22:20:25.653320] L-BFGS: max iterations reached
[W] [22:20:25.654092] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
precision recall f1-score support
0 0.96 0.99 0.97 3867
1 0.92 0.89 0.90 3424
2 0.98 0.96 0.97 3987
3 0.99 1.00 1.00 43178
4 0.94 0.92 0.93 3476
5 0.85 0.96 0.90 1754
6 0.98 0.95 0.96 3703
7 0.77 0.94 0.85 1846
8 0.98 0.96 0.97 3826
9 0.95 0.99 0.97 3833
10 0.96 0.93 0.94 2963
11 0.86 0.95 0.90 918
12 0.96 0.96 0.96 3797
13 0.99 0.99 0.99 3696
14 0.97 0.97 0.97 3444
15 0.95 0.90 0.93 3657
16 0.94 0.99 0.97 3947
17 0.98 0.98 0.98 3004
18 0.98 0.95 0.96 3514
19 0.87 0.81 0.84 1557
20 0.77 0.88 0.82 3575
21 0.96 0.97 0.96 2414
22 1.00 0.99 0.99 3654
23 0.66 0.67 0.67 3160
24 0.91 0.95 0.93 3571
25 0.69 0.94 0.79 3831
26 0.89 0.64 0.74 3203
27 0.93 0.58 0.71 3989
accuracy 0.94 130788
macro avg 0.91 0.91 0.91 130788
weighted avg 0.94 0.94 0.94 130788
Training on Batches: [1], Testing on: [7].
=== Batch [7] ===
Train: (192220, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
Test: (185840, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
ANXA11_WT_Untreated: 4777
CLTC_WT_Untreated: 5749
Calreticulin_WT_Untreated: 6639
DAPI_WT_Untreated: 63181
DCP1A_WT_Untreated: 5177
FMRP_WT_Untreated: 2678
FUS_WT_Untreated: 6586
G3BP1_WT_Untreated: 2689
GM130_WT_Untreated: 6376
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LAMP1_WT_Untreated: 1561
LSM14A_WT_Untreated: 6008
NCL_WT_Untreated: 6573
NEMO_WT_Untreated: 5115
NONO_WT_Untreated: 4542
PEX14_WT_Untreated: 4830
PML_WT_Untreated: 4895
PSD95_WT_Untreated: 5739
PURA_WT_Untreated: 2650
Phalloidin_WT_Untreated: 5431
SNCA_WT_Untreated: 2368
SON_WT_Untreated: 4987
SQSTM1_WT_Untreated: 4527
TDP43_WT_Untreated: 4683
TOMM20_WT_Untreated: 4400
Tubulin_WT_Untreated: 4873
mitotracker_WT_Untreated: 4364
[W] [22:20:55.407467] L-BFGS: max iterations reached
[W] [22:20:55.412271] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
precision recall f1-score support
0 0.99 0.94 0.96 4789
1 0.95 0.95 0.95 6170
2 0.57 0.76 0.65 5999
3 0.99 1.00 0.99 64486
4 0.89 0.75 0.81 5786
5 0.94 0.87 0.91 1726
6 0.00 0.00 0.00 37
7 0.72 0.93 0.81 1788
8 0.83 0.44 0.58 6070
9 0.89 0.24 0.38 5062
10 0.93 0.97 0.95 5493
11 0.27 0.80 0.41 2097
12 0.92 0.81 0.86 5627
13 0.88 0.03 0.06 6165
14 0.90 0.99 0.94 4370
15 0.72 0.86 0.79 4624
16 0.95 0.96 0.96 4011
17 0.51 0.99 0.67 4952
18 0.91 0.98 0.94 5064
19 0.84 0.75 0.80 2163
20 0.90 0.93 0.92 5532
21 0.82 0.79 0.80 2126
22 0.98 0.99 0.99 5574
23 0.58 0.65 0.61 5085
24 0.76 0.59 0.67 4525
25 0.96 0.91 0.93 5021
26 0.73 0.86 0.79 6590
27 0.74 0.18 0.29 4908
accuracy 0.84 185840
macro avg 0.79 0.75 0.73 185840
weighted avg 0.88 0.84 0.83 185840
Training on Batches: [1], Testing on: [8].
=== Batch [8] ===
Train: (192220, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
Test: (166314, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
ANXA11_WT_Untreated: 4777
CLTC_WT_Untreated: 5749
Calreticulin_WT_Untreated: 6639
DAPI_WT_Untreated: 63181
DCP1A_WT_Untreated: 5177
FMRP_WT_Untreated: 2678
FUS_WT_Untreated: 6586
G3BP1_WT_Untreated: 2689
GM130_WT_Untreated: 6376
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LAMP1_WT_Untreated: 1561
LSM14A_WT_Untreated: 6008
NCL_WT_Untreated: 6573
NEMO_WT_Untreated: 5115
NONO_WT_Untreated: 4542
PEX14_WT_Untreated: 4830
PML_WT_Untreated: 4895
PSD95_WT_Untreated: 5739
PURA_WT_Untreated: 2650
Phalloidin_WT_Untreated: 5431
SNCA_WT_Untreated: 2368
SON_WT_Untreated: 4987
SQSTM1_WT_Untreated: 4527
TDP43_WT_Untreated: 4683
TOMM20_WT_Untreated: 4400
Tubulin_WT_Untreated: 4873
mitotracker_WT_Untreated: 4364
[W] [22:21:23.669343] L-BFGS: max iterations reached
[W] [22:21:23.670251] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
precision recall f1-score support
0 0.98 0.96 0.97 4070
1 0.94 0.86 0.90 3949
2 0.86 0.98 0.92 5874
3 0.99 1.00 0.99 55763
4 0.94 0.97 0.96 5741
5 0.87 0.91 0.89 1933
6 0.92 0.56 0.69 5849
7 0.75 0.93 0.83 2165
8 0.99 0.65 0.79 5637
9 0.64 0.97 0.77 5508
10 0.94 0.96 0.95 3647
11 0.94 0.74 0.83 2717
12 0.93 0.97 0.95 5484
13 0.96 0.89 0.92 5848
14 0.99 1.00 0.99 5741
15 0.85 0.83 0.84 4699
16 0.77 0.98 0.86 4075
17 0.89 0.98 0.93 3375
18 0.98 0.96 0.97 2263
19 0.78 0.75 0.77 1816
20 0.86 0.92 0.89 4069
21 0.93 0.95 0.94 1510
22 0.97 1.00 0.98 5255
23 0.68 0.60 0.64 3269
24 0.82 0.82 0.82 3766
25 0.79 0.90 0.84 3809
26 0.76 0.84 0.80 4782
27 0.93 0.45 0.61 3700
accuracy 0.91 166314
macro avg 0.88 0.87 0.87 166314
weighted avg 0.92 0.91 0.91 166314
Training on Batches: [1], Testing on: [9].
=== Batch [9] ===
Train: (192220, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
Test: (193503, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
ANXA11_WT_Untreated: 4777
CLTC_WT_Untreated: 5749
Calreticulin_WT_Untreated: 6639
DAPI_WT_Untreated: 63181
DCP1A_WT_Untreated: 5177
FMRP_WT_Untreated: 2678
FUS_WT_Untreated: 6586
G3BP1_WT_Untreated: 2689
GM130_WT_Untreated: 6376
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LAMP1_WT_Untreated: 1561
LSM14A_WT_Untreated: 6008
NCL_WT_Untreated: 6573
NEMO_WT_Untreated: 5115
NONO_WT_Untreated: 4542
PEX14_WT_Untreated: 4830
PML_WT_Untreated: 4895
PSD95_WT_Untreated: 5739
PURA_WT_Untreated: 2650
Phalloidin_WT_Untreated: 5431
SNCA_WT_Untreated: 2368
SON_WT_Untreated: 4987
SQSTM1_WT_Untreated: 4527
TDP43_WT_Untreated: 4683
TOMM20_WT_Untreated: 4400
Tubulin_WT_Untreated: 4873
mitotracker_WT_Untreated: 4364
[W] [22:21:52.293098] L-BFGS: max iterations reached
[W] [22:21:52.297168] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
precision recall f1-score support
0 0.98 0.71 0.82 3071
1 0.97 0.95 0.96 6371
2 0.74 0.98 0.84 6644
3 0.99 1.00 0.99 63040
4 0.91 0.98 0.94 6827
5 0.97 0.95 0.96 2353
6 0.89 0.96 0.93 6434
7 0.83 0.95 0.89 2695
8 0.99 0.99 0.99 6365
9 0.89 0.88 0.89 5907
10 0.90 0.97 0.93 5015
11 0.91 0.76 0.83 2610
12 0.95 0.96 0.95 6045
13 1.00 0.90 0.95 6417
14 0.97 1.00 0.99 6775
15 0.55 0.57 0.56 3324
16 0.96 0.96 0.96 4246
17 0.96 0.99 0.97 4849
18 0.98 0.83 0.90 6356
19 0.85 0.65 0.74 2304
20 0.87 0.93 0.90 5279
21 0.79 0.82 0.81 2518
22 0.99 0.99 0.99 3306
23 0.67 0.38 0.49 4078
24 0.65 0.63 0.64 4158
25 0.95 0.90 0.92 5271
26 0.67 0.92 0.77 5968
27 0.64 0.47 0.55 5277
accuracy 0.91 193503
macro avg 0.87 0.86 0.86 193503
weighted avg 0.91 0.91 0.91 193503
=== Overall Accuracy ===
0.9107745683549316 [0.9557920619216668, 0.9395586751078081, 0.8368596642272923, 0.9097851052827783, 0.9118773352351126]
=== Evaluation Metrics ===
Label Accuracy Sensitivity Specificity PPV NPV
ANXA11_WT_Untreated 0.997599 0.926879 0.999343 0.972047 0.998199
CLTC_WT_Untreated 0.996551 0.930444 0.998579 0.952582 0.997868
Calreticulin_WT_Untreated 0.988996 0.922801 0.991171 0.774504 0.997447
DAPI_WT_Untreated 0.996146 0.996335 0.996051 0.992144 0.998162
DCP1A_WT_Untreated 0.994924 0.912427 0.997635 0.926856 0.997125
FMRP_WT_Untreated 0.998167 0.927964 0.998926 0.903265 0.999221
FUS_WT_Untreated 0.988603 0.804492 0.993155 0.743948 0.995157
G3BP1_WT_Untreated 0.996070 0.941992 0.996705 0.770476 0.999317
GM130_WT_Untreated 0.992180 0.777402 0.999021 0.961980 0.992952
HNRNPA1_WT_Untreated 0.988278 0.802821 0.994040 0.807133 0.993875
KIF5A_WT_Untreated 0.997158 0.956041 0.998246 0.935130 0.998837
LAMP1_WT_Untreated 0.991436 0.799732 0.993753 0.607347 0.997571
LSM14A_WT_Untreated 0.996044 0.930664 0.998126 0.940547 0.997792
NCL_WT_Untreated 0.990524 0.714718 0.999534 0.980449 0.990761
NEMO_WT_Untreated 0.998680 0.990940 0.998918 0.965725 0.999721
NONO_WT_Untreated 0.990365 0.826633 0.994669 0.802982 0.995439
PEX14_WT_Untreated 0.997086 0.975478 0.997634 0.912769 0.999377
PML_WT_Untreated 0.993026 0.986558 0.993189 0.784344 0.999660
PSD95_WT_Untreated 0.997014 0.919680 0.999112 0.965603 0.997825
PURA_WT_Untreated 0.995798 0.747044 0.998463 0.838844 0.997294
Phalloidin_WT_Untreated 0.993840 0.916413 0.996022 0.866545 0.997640
SNCA_WT_Untreated 0.997140 0.895714 0.998534 0.893609 0.998566
SON_WT_Untreated 0.999454 0.994897 0.999583 0.985370 0.999856
SQSTM1_WT_Untreated 0.983080 0.607378 0.992127 0.650075 0.990561
TDP43_WT_Untreated 0.989883 0.767803 0.995397 0.805470 0.994242
TOMM20_WT_Untreated 0.993653 0.912495 0.995869 0.857776 0.997607
Tubulin_WT_Untreated 0.986436 0.822340 0.991516 0.750037 0.994484
mitotracker_WT_Untreated 0.984240 0.514628 0.997211 0.835969 0.986735
Macro Average 0.993299 0.865097 0.996519 0.863697 0.996546
{'Accuracy': 0.9932990051713398,
'Sensitivity': 0.8650968982184478,
'Specificity': 0.9965188218806127,
'PPV': 0.8636974101979668,
'NPV': 0.9965460659251094}
## Baseline
run_baseline_model(
dataset_config= Cytoself_dataset_config,
batches=[1, 2, 3, 7, 8, 9],
balance=False,
norm=False,
choose_features=False,
top_k=100,
label_map=None,
classifier_class=cuMLLogisticRegression,
classifier_kwargs={},
train_specific_batches = [1],
apply_pca = True
)
2025-08-19 14:01:28 INFO: [load_embeddings] multiplex=False 2025-08-19 14:01:28 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 14:01:28 INFO: [load_embeddings] input_folders = ['batch1'] 2025-08-19 14:01:28 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/
Loading all batches...
2025-08-19 14:01:52 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 14:01:57 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 14:02:00 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 14:02:01 INFO: [load_embeddings] embeddings shape: (192220, 2048) 2025-08-19 14:02:01 INFO: [load_embeddings] labels shape: (192220,) 2025-08-19 14:02:01 INFO: [load_embeddings] example label: ANXA11_WT_Untreated 2025-08-19 14:02:01 INFO: [load_embeddings] paths shape: (192220,) 2025-08-19 14:02:02 INFO: [load_embeddings] multiplex=False 2025-08-19 14:02:02 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 14:02:02 INFO: [load_embeddings] input_folders = ['batch2'] 2025-08-19 14:02:02 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/ 2025-08-19 14:02:19 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 14:02:23 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 14:02:25 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 14:02:26 INFO: [load_embeddings] embeddings shape: (137464, 2048) 2025-08-19 14:02:26 INFO: [load_embeddings] labels shape: (137464,) 2025-08-19 14:02:26 INFO: [load_embeddings] example label: ANXA11_WT_Untreated 2025-08-19 14:02:26 INFO: [load_embeddings] paths shape: (137464,) 2025-08-19 14:02:26 INFO: [load_embeddings] multiplex=False 2025-08-19 14:02:26 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 14:02:26 INFO: [load_embeddings] input_folders = ['batch3'] 2025-08-19 14:02:26 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/ 2025-08-19 14:02:45 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 14:02:49 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 14:02:51 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 14:02:52 INFO: [load_embeddings] embeddings shape: (130788, 2048) 2025-08-19 14:02:52 INFO: [load_embeddings] labels shape: (130788,) 2025-08-19 14:02:52 INFO: [load_embeddings] example label: ANXA11_WT_Untreated 2025-08-19 14:02:52 INFO: [load_embeddings] paths shape: (130788,) 2025-08-19 14:02:52 INFO: [load_embeddings] multiplex=False 2025-08-19 14:02:52 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 14:02:52 INFO: [load_embeddings] input_folders = ['batch7'] 2025-08-19 14:02:52 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/ 2025-08-19 14:03:18 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 14:03:23 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 14:03:26 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 14:03:28 INFO: [load_embeddings] embeddings shape: (185840, 2048) 2025-08-19 14:03:28 INFO: [load_embeddings] labels shape: (185840,) 2025-08-19 14:03:28 INFO: [load_embeddings] example label: ANXA11_WT_Untreated 2025-08-19 14:03:28 INFO: [load_embeddings] paths shape: (185840,) 2025-08-19 14:03:28 INFO: [load_embeddings] multiplex=False 2025-08-19 14:03:28 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 14:03:28 INFO: [load_embeddings] input_folders = ['batch8'] 2025-08-19 14:03:28 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/ 2025-08-19 14:03:55 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 14:04:02 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 14:04:05 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 14:04:06 INFO: [load_embeddings] embeddings shape: (166314, 2048) 2025-08-19 14:04:06 INFO: [load_embeddings] labels shape: (166314,) 2025-08-19 14:04:06 INFO: [load_embeddings] example label: ANXA11_WT_Untreated 2025-08-19 14:04:06 INFO: [load_embeddings] paths shape: (166314,) 2025-08-19 14:04:07 INFO: [load_embeddings] multiplex=False 2025-08-19 14:04:07 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 14:04:07 INFO: [load_embeddings] input_folders = ['batch9'] 2025-08-19 14:04:07 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/ 2025-08-19 14:04:32 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 14:04:38 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 14:04:42 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 14:04:43 INFO: [load_embeddings] embeddings shape: (193503, 2048) 2025-08-19 14:04:43 INFO: [load_embeddings] labels shape: (193503,) 2025-08-19 14:04:43 INFO: [load_embeddings] example label: ANXA11_WT_Untreated 2025-08-19 14:04:43 INFO: [load_embeddings] paths shape: (193503,)
Batches loaded.
Training on Batches: [1], Testing on: [2].
=== Batch [2] ===
Train: (192220, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
Test: (137464, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
ANXA11_WT_Untreated: 4777
CLTC_WT_Untreated: 5749
Calreticulin_WT_Untreated: 6639
DAPI_WT_Untreated: 63181
DCP1A_WT_Untreated: 5177
FMRP_WT_Untreated: 2678
FUS_WT_Untreated: 6586
G3BP1_WT_Untreated: 2689
GM130_WT_Untreated: 6376
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LAMP1_WT_Untreated: 1561
LSM14A_WT_Untreated: 6008
NCL_WT_Untreated: 6573
NEMO_WT_Untreated: 5115
NONO_WT_Untreated: 4542
PEX14_WT_Untreated: 4830
PML_WT_Untreated: 4895
PSD95_WT_Untreated: 5739
PURA_WT_Untreated: 2650
Phalloidin_WT_Untreated: 5431
SNCA_WT_Untreated: 2368
SON_WT_Untreated: 4987
SQSTM1_WT_Untreated: 4527
TDP43_WT_Untreated: 4683
TOMM20_WT_Untreated: 4400
Tubulin_WT_Untreated: 4873
mitotracker_WT_Untreated: 4364
[W] [14:05:20.534433] L-BFGS: max iterations reached
[W] [14:05:20.534632] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
precision recall f1-score support
0 0.92 0.99 0.96 3787
1 0.89 0.92 0.91 4311
2 0.86 0.91 0.88 3390
3 0.97 0.98 0.98 45031
4 0.93 0.87 0.90 4057
5 0.75 0.91 0.83 938
6 0.80 0.63 0.70 3613
7 0.62 0.91 0.73 953
8 0.95 0.95 0.95 3228
9 0.86 0.96 0.90 4216
10 0.96 0.90 0.93 3856
11 0.81 0.70 0.75 1375
12 0.88 0.94 0.91 4171
13 0.98 0.97 0.97 3624
14 0.99 0.99 0.99 3952
15 0.90 0.84 0.87 4542
16 0.94 0.97 0.96 3866
17 0.98 0.99 0.98 3757
18 0.97 0.86 0.91 4292
19 0.63 0.77 0.70 786
20 0.84 0.82 0.83 3857
21 0.95 0.93 0.94 2469
22 0.98 0.99 0.99 4551
23 0.58 0.63 0.61 3546
24 0.83 0.85 0.84 3696
25 0.80 0.86 0.83 3701
26 0.86 0.57 0.69 3897
27 0.86 0.92 0.89 4002
accuracy 0.91 137464
macro avg 0.87 0.88 0.87 137464
weighted avg 0.91 0.91 0.91 137464
Training on Batches: [1], Testing on: [3].
=== Batch [3] ===
Train: (192220, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
Test: (130788, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
ANXA11_WT_Untreated: 4777
CLTC_WT_Untreated: 5749
Calreticulin_WT_Untreated: 6639
DAPI_WT_Untreated: 63181
DCP1A_WT_Untreated: 5177
FMRP_WT_Untreated: 2678
FUS_WT_Untreated: 6586
G3BP1_WT_Untreated: 2689
GM130_WT_Untreated: 6376
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LAMP1_WT_Untreated: 1561
LSM14A_WT_Untreated: 6008
NCL_WT_Untreated: 6573
NEMO_WT_Untreated: 5115
NONO_WT_Untreated: 4542
PEX14_WT_Untreated: 4830
PML_WT_Untreated: 4895
PSD95_WT_Untreated: 5739
PURA_WT_Untreated: 2650
Phalloidin_WT_Untreated: 5431
SNCA_WT_Untreated: 2368
SON_WT_Untreated: 4987
SQSTM1_WT_Untreated: 4527
TDP43_WT_Untreated: 4683
TOMM20_WT_Untreated: 4400
Tubulin_WT_Untreated: 4873
mitotracker_WT_Untreated: 4364
[W] [14:06:07.521268] L-BFGS: max iterations reached
[W] [14:06:07.521472] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
precision recall f1-score support
0 0.93 0.99 0.96 3867
1 0.78 0.78 0.78 3424
2 0.94 0.92 0.93 3987
3 0.98 0.98 0.98 43178
4 0.86 0.78 0.82 3476
5 0.74 0.91 0.82 1754
6 0.86 0.81 0.83 3703
7 0.64 0.82 0.72 1846
8 0.96 0.91 0.93 3826
9 0.90 0.97 0.93 3833
10 0.94 0.90 0.92 2963
11 0.72 0.84 0.77 918
12 0.89 0.93 0.91 3797
13 0.98 0.98 0.98 3696
14 0.95 0.95 0.95 3444
15 0.96 0.89 0.92 3657
16 0.89 0.97 0.93 3947
17 0.97 0.97 0.97 3004
18 0.95 0.93 0.94 3514
19 0.61 0.73 0.66 1557
20 0.66 0.76 0.71 3575
21 0.92 0.93 0.92 2414
22 0.98 0.98 0.98 3654
23 0.49 0.51 0.50 3160
24 0.90 0.94 0.92 3571
25 0.55 0.88 0.68 3831
26 0.83 0.40 0.54 3203
27 0.73 0.26 0.39 3989
accuracy 0.89 130788
macro avg 0.84 0.84 0.83 130788
weighted avg 0.89 0.89 0.88 130788
Training on Batches: [1], Testing on: [7].
=== Batch [7] ===
Train: (192220, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
Test: (185840, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
ANXA11_WT_Untreated: 4777
CLTC_WT_Untreated: 5749
Calreticulin_WT_Untreated: 6639
DAPI_WT_Untreated: 63181
DCP1A_WT_Untreated: 5177
FMRP_WT_Untreated: 2678
FUS_WT_Untreated: 6586
G3BP1_WT_Untreated: 2689
GM130_WT_Untreated: 6376
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LAMP1_WT_Untreated: 1561
LSM14A_WT_Untreated: 6008
NCL_WT_Untreated: 6573
NEMO_WT_Untreated: 5115
NONO_WT_Untreated: 4542
PEX14_WT_Untreated: 4830
PML_WT_Untreated: 4895
PSD95_WT_Untreated: 5739
PURA_WT_Untreated: 2650
Phalloidin_WT_Untreated: 5431
SNCA_WT_Untreated: 2368
SON_WT_Untreated: 4987
SQSTM1_WT_Untreated: 4527
TDP43_WT_Untreated: 4683
TOMM20_WT_Untreated: 4400
Tubulin_WT_Untreated: 4873
mitotracker_WT_Untreated: 4364
[W] [14:06:47.266808] L-BFGS: max iterations reached
[W] [14:06:47.267158] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
precision recall f1-score support
0 0.98 0.88 0.92 4789
1 0.92 0.89 0.91 6170
2 0.55 0.76 0.64 5999
3 0.97 0.98 0.97 64486
4 0.69 0.59 0.63 5786
5 0.92 0.75 0.83 1726
6 0.00 0.00 0.00 37
7 0.62 0.84 0.72 1788
8 0.63 0.39 0.48 6070
9 0.37 0.10 0.15 5062
10 0.88 0.95 0.91 5493
11 0.11 0.35 0.16 2097
12 0.75 0.54 0.63 5627
13 0.34 0.00 0.01 6165
14 0.84 0.98 0.90 4370
15 0.66 0.87 0.75 4624
16 0.84 0.93 0.88 4011
17 0.46 0.99 0.63 4952
18 0.91 0.96 0.94 5064
19 0.65 0.49 0.56 2163
20 0.85 0.87 0.86 5532
21 0.83 0.61 0.70 2126
22 0.94 0.99 0.96 5574
23 0.50 0.60 0.55 5085
24 0.69 0.45 0.55 4525
25 0.88 0.81 0.85 5021
26 0.71 0.80 0.75 6590
27 0.55 0.12 0.19 4908
accuracy 0.78 185840
macro avg 0.68 0.66 0.64 185840
weighted avg 0.79 0.78 0.77 185840
Training on Batches: [1], Testing on: [8].
=== Batch [8] ===
Train: (192220, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
Test: (166314, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
ANXA11_WT_Untreated: 4777
CLTC_WT_Untreated: 5749
Calreticulin_WT_Untreated: 6639
DAPI_WT_Untreated: 63181
DCP1A_WT_Untreated: 5177
FMRP_WT_Untreated: 2678
FUS_WT_Untreated: 6586
G3BP1_WT_Untreated: 2689
GM130_WT_Untreated: 6376
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LAMP1_WT_Untreated: 1561
LSM14A_WT_Untreated: 6008
NCL_WT_Untreated: 6573
NEMO_WT_Untreated: 5115
NONO_WT_Untreated: 4542
PEX14_WT_Untreated: 4830
PML_WT_Untreated: 4895
PSD95_WT_Untreated: 5739
PURA_WT_Untreated: 2650
Phalloidin_WT_Untreated: 5431
SNCA_WT_Untreated: 2368
SON_WT_Untreated: 4987
SQSTM1_WT_Untreated: 4527
TDP43_WT_Untreated: 4683
TOMM20_WT_Untreated: 4400
Tubulin_WT_Untreated: 4873
mitotracker_WT_Untreated: 4364
[W] [14:07:22.568206] L-BFGS: max iterations reached
[W] [14:07:22.568488] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
precision recall f1-score support
0 0.96 0.94 0.95 4070
1 0.90 0.80 0.85 3949
2 0.83 0.97 0.90 5874
3 0.96 0.97 0.96 55763
4 0.82 0.93 0.87 5741
5 0.83 0.83 0.83 1933
6 0.82 0.41 0.55 5849
7 0.66 0.84 0.74 2165
8 0.96 0.55 0.70 5637
9 0.56 0.92 0.69 5508
10 0.90 0.95 0.92 3647
11 0.79 0.34 0.47 2717
12 0.73 0.95 0.83 5484
13 0.94 0.81 0.87 5848
14 0.98 0.99 0.99 5741
15 0.82 0.75 0.78 4699
16 0.84 0.96 0.90 4075
17 0.84 0.98 0.90 3375
18 0.95 0.92 0.94 2263
19 0.57 0.59 0.58 1816
20 0.82 0.84 0.83 4069
21 0.92 0.88 0.90 1510
22 0.96 0.99 0.97 5255
23 0.56 0.57 0.57 3269
24 0.74 0.79 0.76 3766
25 0.64 0.84 0.73 3809
26 0.73 0.76 0.74 4782
27 0.80 0.23 0.36 3700
accuracy 0.86 166314
macro avg 0.82 0.80 0.79 166314
weighted avg 0.86 0.86 0.85 166314
Training on Batches: [1], Testing on: [9].
=== Batch [9] ===
Train: (192220, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
Test: (193503, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
ANXA11_WT_Untreated: 4777
CLTC_WT_Untreated: 5749
Calreticulin_WT_Untreated: 6639
DAPI_WT_Untreated: 63181
DCP1A_WT_Untreated: 5177
FMRP_WT_Untreated: 2678
FUS_WT_Untreated: 6586
G3BP1_WT_Untreated: 2689
GM130_WT_Untreated: 6376
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LAMP1_WT_Untreated: 1561
LSM14A_WT_Untreated: 6008
NCL_WT_Untreated: 6573
NEMO_WT_Untreated: 5115
NONO_WT_Untreated: 4542
PEX14_WT_Untreated: 4830
PML_WT_Untreated: 4895
PSD95_WT_Untreated: 5739
PURA_WT_Untreated: 2650
Phalloidin_WT_Untreated: 5431
SNCA_WT_Untreated: 2368
SON_WT_Untreated: 4987
SQSTM1_WT_Untreated: 4527
TDP43_WT_Untreated: 4683
TOMM20_WT_Untreated: 4400
Tubulin_WT_Untreated: 4873
mitotracker_WT_Untreated: 4364
[W] [14:07:58.072874] L-BFGS: max iterations reached
[W] [14:07:58.073265] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
precision recall f1-score support
0 0.92 0.60 0.73 3071
1 0.95 0.89 0.92 6371
2 0.70 0.96 0.81 6644
3 0.97 0.97 0.97 63040
4 0.78 0.95 0.86 6827
5 0.96 0.88 0.92 2353
6 0.83 0.84 0.84 6434
7 0.79 0.90 0.84 2695
8 0.97 0.94 0.95 6365
9 0.70 0.75 0.72 5907
10 0.83 0.95 0.89 5015
11 0.71 0.37 0.49 2610
12 0.85 0.93 0.89 6045
13 0.99 0.80 0.88 6417
14 0.96 1.00 0.98 6775
15 0.46 0.52 0.49 3324
16 0.89 0.90 0.90 4246
17 0.95 0.98 0.97 4849
18 0.96 0.82 0.89 6356
19 0.74 0.55 0.63 2304
20 0.81 0.85 0.83 5279
21 0.67 0.65 0.66 2518
22 0.95 0.99 0.97 3306
23 0.51 0.37 0.43 4078
24 0.55 0.53 0.54 4158
25 0.86 0.80 0.83 5271
26 0.65 0.86 0.74 5968
27 0.55 0.33 0.42 5277
accuracy 0.86 193503
macro avg 0.80 0.78 0.78 193503
weighted avg 0.86 0.86 0.85 193503
=== Overall Accuracy ===
0.8586465820142102 [0.9125880230460339, 0.8866256843135456, 0.7801657339647008, 0.8551174284786609, 0.8587360402681096]
=== Evaluation Metrics ===
Label Accuracy Sensitivity Specificity PPV NPV
ANXA11_WT_Untreated 0.996140 0.889910 0.998759 0.946454 0.997290
CLTC_WT_Untreated 0.993174 0.867121 0.997041 0.899884 0.995928
Calreticulin_WT_Untreated 0.986691 0.904070 0.989406 0.737137 0.996824
DAPI_WT_Untreated 0.980892 0.975149 0.983767 0.967813 0.987514
DCP1A_WT_Untreated 0.988134 0.828176 0.993389 0.804495 0.994350
FMRP_WT_Untreated 0.996744 0.852482 0.998304 0.844525 0.998405
FUS_WT_Untreated 0.985226 0.668670 0.993052 0.704059 0.991819
G3BP1_WT_Untreated 0.993596 0.857944 0.995189 0.676827 0.998327
GM130_WT_Untreated 0.988717 0.716429 0.997391 0.897403 0.991025
HNRNPA1_WT_Untreated 0.982037 0.722050 0.990115 0.694144 0.991353
KIF5A_WT_Untreated 0.995389 0.933584 0.997024 0.892439 0.998241
LAMP1_WT_Untreated 0.984480 0.447257 0.990971 0.374429 0.993306
LSM14A_WT_Untreated 0.989421 0.848989 0.993894 0.815804 0.995184
NCL_WT_Untreated 0.988560 0.660350 0.999283 0.967841 0.989017
NEMO_WT_Untreated 0.997734 0.983815 0.998162 0.942739 0.999502
NONO_WT_Untreated 0.987743 0.782980 0.993125 0.749610 0.994289
PEX14_WT_Untreated 0.995479 0.944701 0.996767 0.881187 0.998594
PML_WT_Untreated 0.991311 0.982946 0.991521 0.744312 0.999568
PSD95_WT_Untreated 0.995760 0.889944 0.998630 0.946264 0.997020
PURA_WT_Untreated 0.992192 0.595061 0.996446 0.642026 0.995666
Phalloidin_WT_Untreated 0.989742 0.834887 0.994107 0.799725 0.995340
SNCA_WT_Untreated 0.995384 0.797590 0.998103 0.852508 0.997220
SON_WT_Untreated 0.998571 0.988272 0.998862 0.960790 0.999669
SQSTM1_WT_Untreated 0.977769 0.536576 0.988393 0.526777 0.988836
TDP43_WT_Untreated 0.986749 0.696135 0.993964 0.741131 0.992468
TOMM20_WT_Untreated 0.987554 0.833680 0.991755 0.734115 0.995442
Tubulin_WT_Untreated 0.983031 0.717308 0.991257 0.717513 0.991249
mitotracker_WT_Untreated 0.979046 0.363641 0.996043 0.717378 0.982660
Macro Average 0.989545 0.789990 0.994454 0.792119 0.994504
run_train_test_split_baseline(
Cytoself_dataset_config,
batches=[1,],
classifier_class=cuMLLogisticRegression,
classifier_kwargs={},
)
2025-08-19 14:17:33 INFO: [load_embeddings] multiplex=False 2025-08-19 14:17:33 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 14:17:33 INFO: [load_embeddings] input_folders = ['batch1'] 2025-08-19 14:17:33 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/ 2025-08-19 14:17:57 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 14:18:02 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 14:18:05 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 14:18:06 INFO: [load_embeddings] embeddings shape: (192220, 2048) 2025-08-19 14:18:06 INFO: [load_embeddings] labels shape: (192220,) 2025-08-19 14:18:06 INFO: [load_embeddings] example label: ANXA11_WT_Untreated 2025-08-19 14:18:06 INFO: [load_embeddings] paths shape: (192220,)
Train dataset
(153776,) (153776, 2048) [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
1: 4599
22: 3990
12: 4807
23: 3622
13: 5258
26: 3898
18: 4591
3: 50545
27: 3491
14: 4092
8: 5101
5: 2142
10: 3839
4: 4142
7: 2151
16: 3864
9: 4818
21: 1894
24: 3746
19: 2120
2: 5311
0: 3822
6: 5269
20: 4345
17: 3916
25: 3520
15: 3634
11: 1249
Test dataset
(38444,) (38444, 2048) [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
1: 1150
18: 1148
3: 12636
15: 908
6: 1317
10: 960
17: 979
24: 937
14: 1023
0: 955
19: 530
13: 1315
23: 905
11: 312
7: 538
26: 975
20: 1086
12: 1201
16: 966
22: 997
25: 880
27: 873
4: 1035
5: 536
9: 1205
2: 1328
8: 1275
21: 474
[W] [14:18:21.983054] L-BFGS: max iterations reached
[W] [14:18:21.995735] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
precision recall f1-score support
0 0.99 0.99 0.99 955
1 0.98 0.98 0.98 1150
2 0.99 0.99 0.99 1328
3 1.00 1.00 1.00 12636
4 0.97 0.97 0.97 1035
5 0.97 0.97 0.97 536
6 0.98 0.97 0.98 1317
7 0.95 0.96 0.95 538
8 0.99 0.99 0.99 1275
9 0.99 0.98 0.98 1205
10 0.98 0.97 0.98 960
11 0.96 0.92 0.94 312
12 0.98 0.99 0.99 1201
13 1.00 0.99 0.99 1315
14 0.99 1.00 1.00 1023
15 0.96 0.95 0.96 908
16 0.99 0.99 0.99 966
17 0.99 1.00 0.99 979
18 0.98 0.98 0.98 1148
19 0.98 0.95 0.96 530
20 0.95 0.96 0.95 1086
21 0.98 0.98 0.98 474
22 0.99 0.99 0.99 997
23 0.79 0.81 0.80 905
24 0.96 0.96 0.96 937
25 0.97 0.97 0.97 880
26 0.84 0.83 0.83 975
27 0.97 0.97 0.97 873
accuracy 0.98 38444
macro avg 0.97 0.96 0.97 38444
weighted avg 0.98 0.98 0.98 38444
Accuracy: 0.9770
=== Evaluation Metrics ===
Label Accuracy Sensitivity Specificity PPV NPV
ANXA11_WT_Untreated 0.999506 0.989529 0.999760 0.990566 0.999733
CLTC_WT_Untreated 0.998595 0.975652 0.999303 0.977352 0.999249
Calreticulin_WT_Untreated 0.999324 0.990211 0.999650 0.990211 0.999650
DAPI_WT_Untreated 0.998335 0.998180 0.998411 0.996760 0.999108
DCP1A_WT_Untreated 0.998439 0.972947 0.999145 0.969201 0.999251
FMRP_WT_Untreated 0.999220 0.973881 0.999578 0.970260 0.999631
FUS_WT_Untreated 0.998335 0.974184 0.999192 0.977152 0.999084
G3BP1_WT_Untreated 0.998725 0.957249 0.999314 0.951941 0.999393
GM130_WT_Untreated 0.999428 0.994510 0.999596 0.988309 0.999812
HNRNPA1_WT_Untreated 0.999012 0.980083 0.999624 0.988285 0.999356
KIF5A_WT_Untreated 0.998803 0.972917 0.999466 0.979036 0.999306
LAMP1_WT_Untreated 0.999038 0.923077 0.999659 0.956811 0.999371
LSM14A_WT_Untreated 0.999116 0.988343 0.999463 0.983430 0.999624
NCL_WT_Untreated 0.999584 0.991635 0.999865 0.996180 0.999704
NEMO_WT_Untreated 0.999792 0.998045 0.999840 0.994158 0.999947
NONO_WT_Untreated 0.998049 0.953744 0.999121 0.963293 0.998881
PEX14_WT_Untreated 0.999558 0.989648 0.999813 0.992731 0.999733
PML_WT_Untreated 0.999688 0.995914 0.999786 0.991862 0.999893
PSD95_WT_Untreated 0.998673 0.979965 0.999249 0.975716 0.999383
PURA_WT_Untreated 0.998986 0.949057 0.999683 0.976699 0.999288
Phalloidin_WT_Untreated 0.997451 0.955801 0.998662 0.954044 0.998715
SNCA_WT_Untreated 0.999480 0.978903 0.999737 0.978903 0.999737
SON_WT_Untreated 0.999740 0.994985 0.999866 0.994985 0.999866
SQSTM1_WT_Untreated 0.990428 0.808840 0.994805 0.789644 0.995389
TDP43_WT_Untreated 0.997997 0.962647 0.998880 0.955508 0.999067
TOMM20_WT_Untreated 0.998543 0.968182 0.999255 0.968182 0.999255
Tubulin_WT_Untreated 0.991702 0.827692 0.995970 0.842380 0.995518
mitotracker_WT_Untreated 0.998517 0.966781 0.999255 0.967890 0.999228
Macro Average 0.998359 0.964736 0.999141 0.966482 0.999149
run_train_test_split_baseline(
Cytoself_dataset_config,
batches=[1,2,3,7,8,9],
classifier_class=cuMLLogisticRegression,
classifier_kwargs={},
)
2025-08-19 14:21:18 INFO: [load_embeddings] multiplex=False 2025-08-19 14:21:18 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 14:21:18 INFO: [load_embeddings] input_folders = ['batch1'] 2025-08-19 14:21:18 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/ 2025-08-19 14:21:31 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 14:21:36 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 14:21:38 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 14:21:39 INFO: [load_embeddings] embeddings shape: (192220, 2048) 2025-08-19 14:21:39 INFO: [load_embeddings] labels shape: (192220,) 2025-08-19 14:21:39 INFO: [load_embeddings] example label: ANXA11_WT_Untreated 2025-08-19 14:21:39 INFO: [load_embeddings] paths shape: (192220,) 2025-08-19 14:21:40 INFO: [load_embeddings] multiplex=False 2025-08-19 14:21:40 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 14:21:40 INFO: [load_embeddings] input_folders = ['batch2'] 2025-08-19 14:21:40 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/ 2025-08-19 14:21:56 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 14:22:00 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 14:22:03 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 14:22:03 INFO: [load_embeddings] embeddings shape: (137464, 2048) 2025-08-19 14:22:03 INFO: [load_embeddings] labels shape: (137464,) 2025-08-19 14:22:03 INFO: [load_embeddings] example label: ANXA11_WT_Untreated 2025-08-19 14:22:03 INFO: [load_embeddings] paths shape: (137464,) 2025-08-19 14:22:04 INFO: [load_embeddings] multiplex=False 2025-08-19 14:22:04 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 14:22:04 INFO: [load_embeddings] input_folders = ['batch3'] 2025-08-19 14:22:04 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/ 2025-08-19 14:22:22 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 14:22:26 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 14:22:29 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 14:22:29 INFO: [load_embeddings] embeddings shape: (130788, 2048) 2025-08-19 14:22:29 INFO: [load_embeddings] labels shape: (130788,) 2025-08-19 14:22:29 INFO: [load_embeddings] example label: ANXA11_WT_Untreated 2025-08-19 14:22:29 INFO: [load_embeddings] paths shape: (130788,) 2025-08-19 14:22:30 INFO: [load_embeddings] multiplex=False 2025-08-19 14:22:30 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 14:22:30 INFO: [load_embeddings] input_folders = ['batch7'] 2025-08-19 14:22:30 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/ 2025-08-19 14:22:54 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 14:23:00 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 14:23:03 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 14:23:04 INFO: [load_embeddings] embeddings shape: (185840, 2048) 2025-08-19 14:23:04 INFO: [load_embeddings] labels shape: (185840,) 2025-08-19 14:23:04 INFO: [load_embeddings] example label: ANXA11_WT_Untreated 2025-08-19 14:23:04 INFO: [load_embeddings] paths shape: (185840,) 2025-08-19 14:23:04 INFO: [load_embeddings] multiplex=False 2025-08-19 14:23:04 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 14:23:04 INFO: [load_embeddings] input_folders = ['batch8'] 2025-08-19 14:23:04 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/ 2025-08-19 14:23:32 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 14:23:38 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 14:23:41 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 14:23:43 INFO: [load_embeddings] embeddings shape: (166314, 2048) 2025-08-19 14:23:43 INFO: [load_embeddings] labels shape: (166314,) 2025-08-19 14:23:43 INFO: [load_embeddings] example label: ANXA11_WT_Untreated 2025-08-19 14:23:43 INFO: [load_embeddings] paths shape: (166314,) 2025-08-19 14:23:43 INFO: [load_embeddings] multiplex=False 2025-08-19 14:23:43 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-19 14:23:43 INFO: [load_embeddings] input_folders = ['batch9'] 2025-08-19 14:23:43 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/ 2025-08-19 14:24:09 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-19 14:24:15 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-19 14:24:19 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-19 14:24:20 INFO: [load_embeddings] embeddings shape: (193503, 2048) 2025-08-19 14:24:20 INFO: [load_embeddings] labels shape: (193503,) 2025-08-19 14:24:20 INFO: [load_embeddings] example label: ANXA11_WT_Untreated 2025-08-19 14:24:20 INFO: [load_embeddings] paths shape: (193503,)
Train dataset
(804903,) (804903, 2048) [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
15: 20310
3: 267743
23: 18932
16: 19980
6: 20978
14: 23518
25: 20826
9: 24439
26: 23450
19: 9021
2: 26026
20: 22194
4: 24851
27: 20992
1: 23979
22: 21862
11: 9023
21: 10724
5: 9106
12: 24906
8: 25202
18: 21782
0: 19489
7: 9709
13: 25858
17: 19866
24: 19519
10: 20618
Test dataset
(201226,) (201226, 2048) [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
20: 5549
12: 6226
25: 5207
26: 5863
23: 4733
11: 2255
3: 66936
9: 6110
17: 4966
10: 5155
4: 6213
24: 4880
21: 2681
8: 6300
14: 5879
1: 5995
6: 5244
27: 5248
18: 5446
2: 6507
0: 4872
5: 2276
22: 5465
13: 6465
16: 4995
15: 5078
19: 2255
7: 2427
[W] [14:25:36.989005] L-BFGS: max iterations reached
[W] [14:25:36.996768] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
precision recall f1-score support
0 0.98 0.98 0.98 4872
1 0.97 0.97 0.97 5995
2 0.98 0.97 0.97 6507
3 1.00 1.00 1.00 66936
4 0.97 0.97 0.97 6213
5 0.96 0.97 0.97 2276
6 0.89 0.90 0.90 5244
7 0.96 0.96 0.96 2427
8 0.98 0.98 0.98 6300
9 0.92 0.91 0.91 6110
10 0.98 0.97 0.98 5155
11 0.96 0.96 0.96 2255
12 0.98 0.98 0.98 6226
13 0.99 0.99 0.99 6465
14 0.99 0.99 0.99 5879
15 0.93 0.93 0.93 5078
16 0.99 0.99 0.99 4995
17 0.99 1.00 0.99 4966
18 0.98 0.98 0.98 5446
19 0.95 0.94 0.95 2255
20 0.94 0.94 0.94 5549
21 0.97 0.97 0.97 2681
22 1.00 1.00 1.00 5465
23 0.78 0.75 0.77 4733
24 0.92 0.92 0.92 4880
25 0.96 0.96 0.96 5207
26 0.84 0.86 0.85 5863
27 0.96 0.96 0.96 5248
accuracy 0.97 201226
macro avg 0.95 0.95 0.95 201226
weighted avg 0.97 0.97 0.97 201226
Accuracy: 0.9679
=== Evaluation Metrics ===
Label Accuracy Sensitivity Specificity PPV NPV
ANXA11_WT_Untreated 0.999175 0.981938 0.999603 0.983957 0.999552
CLTC_WT_Untreated 0.998400 0.974812 0.999124 0.971571 0.999226
Calreticulin_WT_Untreated 0.998350 0.972645 0.999209 0.976246 0.999086
DAPI_WT_Untreated 0.998504 0.998431 0.998540 0.997076 0.999218
DCP1A_WT_Untreated 0.998231 0.969902 0.999133 0.972720 0.999041
FMRP_WT_Untreated 0.999230 0.969684 0.999568 0.962495 0.999653
FUS_WT_Untreated 0.994722 0.904081 0.997148 0.894528 0.997433
G3BP1_WT_Untreated 0.999081 0.964153 0.999507 0.959803 0.999562
GM130_WT_Untreated 0.998882 0.982540 0.999410 0.981761 0.999436
HNRNPA1_WT_Untreated 0.994817 0.906383 0.997586 0.921618 0.997070
KIF5A_WT_Untreated 0.998852 0.974394 0.999495 0.980672 0.999327
LAMP1_WT_Untreated 0.999101 0.956098 0.999588 0.963360 0.999502
LSM14A_WT_Untreated 0.998842 0.981850 0.999385 0.980748 0.999420
NCL_WT_Untreated 0.999448 0.990719 0.999738 0.992100 0.999692
NEMO_WT_Untreated 0.999443 0.992856 0.999642 0.988150 0.999785
NONO_WT_Untreated 0.996397 0.926152 0.998216 0.930734 0.998088
PEX14_WT_Untreated 0.999448 0.987788 0.999745 0.989968 0.999689
PML_WT_Untreated 0.999742 0.995570 0.999847 0.993969 0.999888
PSD95_WT_Untreated 0.998912 0.982740 0.999362 0.977177 0.999520
PURA_WT_Untreated 0.998807 0.941907 0.999452 0.951187 0.999342
Phalloidin_WT_Untreated 0.996601 0.939268 0.998227 0.937579 0.998278
SNCA_WT_Untreated 0.999240 0.972771 0.999597 0.970238 0.999632
SON_WT_Untreated 0.999846 0.996706 0.999934 0.997619 0.999908
SQSTM1_WT_Untreated 0.989276 0.751743 0.994997 0.783528 0.994026
TDP43_WT_Untreated 0.996114 0.919057 0.998029 0.920567 0.997988
TOMM20_WT_Untreated 0.997689 0.955445 0.998811 0.955261 0.998816
Tubulin_WT_Untreated 0.990926 0.857240 0.994938 0.835578 0.995712
mitotracker_WT_Untreated 0.997818 0.961128 0.998801 0.955484 0.998959
Macro Average 0.997710 0.953857 0.998808 0.954489 0.998816
for clf_class, clf_kwargs in additional_classifiers:
print(f"\n=== Running {clf_class.__name__} ===")
run_baseline_model(
dataset_config=Cytoself_dataset_config,
batches=[1, 2, 3, 7, 8, 9],
classifier_class=clf_class,
classifier_kwargs=clf_kwargs,
train_specific_batches=[1],
results_csv="classification_results-indi.csv"
)
2025-08-20 22:21:58 INFO: [load_embeddings] multiplex=False 2025-08-20 22:21:58 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-20 22:21:58 INFO: [load_embeddings] input_folders = ['batch1'] 2025-08-20 22:21:58 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/
=== Running LinearSVC === Loading all batches...
2025-08-20 22:22:33 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-20 22:22:39 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-20 22:22:43 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-20 22:22:44 INFO: [load_embeddings] embeddings shape: (192220, 2048) 2025-08-20 22:22:44 INFO: [load_embeddings] labels shape: (192220,) 2025-08-20 22:22:44 INFO: [load_embeddings] example label: ANXA11_WT_Untreated 2025-08-20 22:22:44 INFO: [load_embeddings] paths shape: (192220,) 2025-08-20 22:22:45 INFO: [load_embeddings] multiplex=False 2025-08-20 22:22:45 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-20 22:22:45 INFO: [load_embeddings] input_folders = ['batch2'] 2025-08-20 22:22:45 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/ 2025-08-20 22:23:09 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-20 22:23:13 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-20 22:23:16 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-20 22:23:17 INFO: [load_embeddings] embeddings shape: (137464, 2048) 2025-08-20 22:23:17 INFO: [load_embeddings] labels shape: (137464,) 2025-08-20 22:23:17 INFO: [load_embeddings] example label: ANXA11_WT_Untreated 2025-08-20 22:23:17 INFO: [load_embeddings] paths shape: (137464,) 2025-08-20 22:23:17 INFO: [load_embeddings] multiplex=False 2025-08-20 22:23:17 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-20 22:23:17 INFO: [load_embeddings] input_folders = ['batch3'] 2025-08-20 22:23:17 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/ 2025-08-20 22:23:43 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-20 22:23:47 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-20 22:23:50 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-20 22:23:51 INFO: [load_embeddings] embeddings shape: (130788, 2048) 2025-08-20 22:23:51 INFO: [load_embeddings] labels shape: (130788,) 2025-08-20 22:23:51 INFO: [load_embeddings] example label: ANXA11_WT_Untreated 2025-08-20 22:23:51 INFO: [load_embeddings] paths shape: (130788,) 2025-08-20 22:23:51 INFO: [load_embeddings] multiplex=False 2025-08-20 22:23:51 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-20 22:23:51 INFO: [load_embeddings] input_folders = ['batch7'] 2025-08-20 22:23:51 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/ 2025-08-20 22:24:25 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-20 22:24:31 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-20 22:24:35 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-20 22:24:36 INFO: [load_embeddings] embeddings shape: (185840, 2048) 2025-08-20 22:24:36 INFO: [load_embeddings] labels shape: (185840,) 2025-08-20 22:24:36 INFO: [load_embeddings] example label: ANXA11_WT_Untreated 2025-08-20 22:24:36 INFO: [load_embeddings] paths shape: (185840,) 2025-08-20 22:24:37 INFO: [load_embeddings] multiplex=False 2025-08-20 22:24:37 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-20 22:24:37 INFO: [load_embeddings] input_folders = ['batch8'] 2025-08-20 22:24:37 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/ 2025-08-20 22:25:15 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-20 22:25:22 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-20 22:25:25 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-20 22:25:26 INFO: [load_embeddings] embeddings shape: (166314, 2048) 2025-08-20 22:25:26 INFO: [load_embeddings] labels shape: (166314,) 2025-08-20 22:25:26 INFO: [load_embeddings] example label: ANXA11_WT_Untreated 2025-08-20 22:25:26 INFO: [load_embeddings] paths shape: (166314,) 2025-08-20 22:25:27 INFO: [load_embeddings] multiplex=False 2025-08-20 22:25:27 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-20 22:25:27 INFO: [load_embeddings] input_folders = ['batch9'] 2025-08-20 22:25:27 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/ 2025-08-20 22:26:03 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-20 22:26:10 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-20 22:26:14 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-20 22:26:15 INFO: [load_embeddings] embeddings shape: (193503, 2048) 2025-08-20 22:26:15 INFO: [load_embeddings] labels shape: (193503,) 2025-08-20 22:26:15 INFO: [load_embeddings] example label: ANXA11_WT_Untreated 2025-08-20 22:26:15 INFO: [load_embeddings] paths shape: (193503,)
Batches loaded. Training on Batches: [1], Testing on: [2]. === Batch [2] === Train: (192220, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27] Test: (137464, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27] ANXA11_WT_Untreated: 4777 CLTC_WT_Untreated: 5749 Calreticulin_WT_Untreated: 6639 DAPI_WT_Untreated: 63181 DCP1A_WT_Untreated: 5177 FMRP_WT_Untreated: 2678 FUS_WT_Untreated: 6586 G3BP1_WT_Untreated: 2689 GM130_WT_Untreated: 6376 HNRNPA1_WT_Untreated: 6023 KIF5A_WT_Untreated: 4799 LAMP1_WT_Untreated: 1561 LSM14A_WT_Untreated: 6008 NCL_WT_Untreated: 6573 NEMO_WT_Untreated: 5115 NONO_WT_Untreated: 4542 PEX14_WT_Untreated: 4830 PML_WT_Untreated: 4895 PSD95_WT_Untreated: 5739 PURA_WT_Untreated: 2650 Phalloidin_WT_Untreated: 5431 SNCA_WT_Untreated: 2368 SON_WT_Untreated: 4987 SQSTM1_WT_Untreated: 4527 TDP43_WT_Untreated: 4683 TOMM20_WT_Untreated: 4400 Tubulin_WT_Untreated: 4873 mitotracker_WT_Untreated: 4364
/home/projects/hornsteinlab/galavir/.conda/envs/nova/lib/python3.9/site-packages/sklearn/svm/_classes.py:32: FutureWarning: The default value of `dual` will change from `True` to `'auto'` in 1.5. Set the value of `dual` explicitly to suppress the warning. warnings.warn( /home/projects/hornsteinlab/galavir/.conda/envs/nova/lib/python3.9/site-packages/sklearn/svm/_base.py:1242: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn(
precision recall f1-score support
0 0.95 0.99 0.97 3787
1 0.94 0.95 0.95 4311
2 0.91 0.95 0.93 3390
3 1.00 0.99 0.99 45031
4 0.96 0.91 0.94 4057
5 0.84 0.94 0.88 938
6 0.97 0.64 0.78 3613
7 0.71 0.95 0.81 953
8 0.97 0.98 0.98 3228
9 0.66 0.99 0.79 4216
10 0.96 0.93 0.94 3856
11 0.90 0.88 0.89 1375
12 0.95 0.96 0.95 4171
13 1.00 0.95 0.97 3624
14 0.99 0.99 0.99 3952
15 0.95 0.86 0.90 4542
16 0.96 0.99 0.98 3866
17 0.99 1.00 0.99 3757
18 0.98 0.92 0.95 4292
19 0.77 0.83 0.80 786
20 0.89 0.85 0.87 3857
21 0.95 0.96 0.96 2469
22 0.99 0.98 0.99 4551
23 0.72 0.68 0.70 3546
24 0.88 0.89 0.88 3696
25 0.90 0.90 0.90 3701
26 0.84 0.80 0.82 3897
27 0.94 0.96 0.95 4002
accuracy 0.94 137464
macro avg 0.91 0.92 0.91 137464
weighted avg 0.94 0.94 0.94 137464
Training on Batches: [1], Testing on: [3].
=== Batch [3] ===
Train: (192220, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
Test: (130788, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
ANXA11_WT_Untreated: 4777
CLTC_WT_Untreated: 5749
Calreticulin_WT_Untreated: 6639
DAPI_WT_Untreated: 63181
DCP1A_WT_Untreated: 5177
FMRP_WT_Untreated: 2678
FUS_WT_Untreated: 6586
G3BP1_WT_Untreated: 2689
GM130_WT_Untreated: 6376
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LAMP1_WT_Untreated: 1561
LSM14A_WT_Untreated: 6008
NCL_WT_Untreated: 6573
NEMO_WT_Untreated: 5115
NONO_WT_Untreated: 4542
PEX14_WT_Untreated: 4830
PML_WT_Untreated: 4895
PSD95_WT_Untreated: 5739
PURA_WT_Untreated: 2650
Phalloidin_WT_Untreated: 5431
SNCA_WT_Untreated: 2368
SON_WT_Untreated: 4987
SQSTM1_WT_Untreated: 4527
TDP43_WT_Untreated: 4683
TOMM20_WT_Untreated: 4400
Tubulin_WT_Untreated: 4873
mitotracker_WT_Untreated: 4364
/home/projects/hornsteinlab/galavir/.conda/envs/nova/lib/python3.9/site-packages/sklearn/svm/_classes.py:32: FutureWarning: The default value of `dual` will change from `True` to `'auto'` in 1.5. Set the value of `dual` explicitly to suppress the warning. warnings.warn( /home/projects/hornsteinlab/galavir/.conda/envs/nova/lib/python3.9/site-packages/sklearn/svm/_base.py:1242: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn(
precision recall f1-score support
0 0.95 0.99 0.97 3867
1 0.91 0.86 0.88 3424
2 0.95 0.96 0.95 3987
3 1.00 0.98 0.99 43178
4 0.94 0.88 0.91 3476
5 0.83 0.94 0.88 1754
6 0.99 0.91 0.94 3703
7 0.75 0.89 0.81 1846
8 0.98 0.96 0.97 3826
9 0.77 1.00 0.87 3833
10 0.95 0.90 0.92 2963
11 0.84 0.94 0.89 918
12 0.96 0.95 0.95 3797
13 0.99 0.97 0.98 3696
14 0.95 0.97 0.96 3444
15 0.97 0.86 0.91 3657
16 0.93 0.99 0.96 3947
17 0.97 0.99 0.98 3004
18 0.97 0.94 0.95 3514
19 0.82 0.73 0.77 1557
20 0.72 0.79 0.75 3575
21 0.93 0.96 0.95 2414
22 0.99 0.98 0.99 3654
23 0.64 0.63 0.64 3160
24 0.88 0.95 0.91 3571
25 0.73 0.91 0.81 3831
26 0.76 0.67 0.72 3203
27 0.93 0.62 0.74 3989
accuracy 0.93 130788
macro avg 0.89 0.90 0.89 130788
weighted avg 0.93 0.93 0.92 130788
Training on Batches: [1], Testing on: [7].
=== Batch [7] ===
Train: (192220, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
Test: (185840, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
ANXA11_WT_Untreated: 4777
CLTC_WT_Untreated: 5749
Calreticulin_WT_Untreated: 6639
DAPI_WT_Untreated: 63181
DCP1A_WT_Untreated: 5177
FMRP_WT_Untreated: 2678
FUS_WT_Untreated: 6586
G3BP1_WT_Untreated: 2689
GM130_WT_Untreated: 6376
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LAMP1_WT_Untreated: 1561
LSM14A_WT_Untreated: 6008
NCL_WT_Untreated: 6573
NEMO_WT_Untreated: 5115
NONO_WT_Untreated: 4542
PEX14_WT_Untreated: 4830
PML_WT_Untreated: 4895
PSD95_WT_Untreated: 5739
PURA_WT_Untreated: 2650
Phalloidin_WT_Untreated: 5431
SNCA_WT_Untreated: 2368
SON_WT_Untreated: 4987
SQSTM1_WT_Untreated: 4527
TDP43_WT_Untreated: 4683
TOMM20_WT_Untreated: 4400
Tubulin_WT_Untreated: 4873
mitotracker_WT_Untreated: 4364
/home/projects/hornsteinlab/galavir/.conda/envs/nova/lib/python3.9/site-packages/sklearn/svm/_classes.py:32: FutureWarning: The default value of `dual` will change from `True` to `'auto'` in 1.5. Set the value of `dual` explicitly to suppress the warning. warnings.warn( /home/projects/hornsteinlab/galavir/.conda/envs/nova/lib/python3.9/site-packages/sklearn/svm/_base.py:1242: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn(
precision recall f1-score support
0 0.99 0.93 0.96 4789
1 0.94 0.92 0.93 6170
2 0.54 0.76 0.63 5999
3 0.99 0.99 0.99 64486
4 0.89 0.68 0.77 5786
5 0.92 0.84 0.87 1726
6 0.00 0.00 0.00 37
7 0.68 0.84 0.75 1788
8 0.81 0.46 0.58 6070
9 0.62 0.30 0.40 5062
10 0.94 0.95 0.95 5493
11 0.28 0.83 0.42 2097
12 0.90 0.75 0.82 5627
13 0.65 0.01 0.01 6165
14 0.84 0.99 0.91 4370
15 0.77 0.83 0.80 4624
16 0.95 0.97 0.96 4011
17 0.50 0.99 0.66 4952
18 0.89 0.96 0.92 5064
19 0.83 0.73 0.78 2163
20 0.89 0.88 0.89 5532
21 0.60 0.77 0.68 2126
22 0.99 0.98 0.99 5574
23 0.51 0.49 0.50 5085
24 0.73 0.66 0.69 4525
25 0.96 0.86 0.90 5021
26 0.65 0.92 0.76 6590
27 0.58 0.11 0.19 4908
accuracy 0.82 185840
macro avg 0.74 0.73 0.70 185840
weighted avg 0.85 0.82 0.81 185840
Training on Batches: [1], Testing on: [8].
=== Batch [8] ===
Train: (192220, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
Test: (166314, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
ANXA11_WT_Untreated: 4777
CLTC_WT_Untreated: 5749
Calreticulin_WT_Untreated: 6639
DAPI_WT_Untreated: 63181
DCP1A_WT_Untreated: 5177
FMRP_WT_Untreated: 2678
FUS_WT_Untreated: 6586
G3BP1_WT_Untreated: 2689
GM130_WT_Untreated: 6376
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LAMP1_WT_Untreated: 1561
LSM14A_WT_Untreated: 6008
NCL_WT_Untreated: 6573
NEMO_WT_Untreated: 5115
NONO_WT_Untreated: 4542
PEX14_WT_Untreated: 4830
PML_WT_Untreated: 4895
PSD95_WT_Untreated: 5739
PURA_WT_Untreated: 2650
Phalloidin_WT_Untreated: 5431
SNCA_WT_Untreated: 2368
SON_WT_Untreated: 4987
SQSTM1_WT_Untreated: 4527
TDP43_WT_Untreated: 4683
TOMM20_WT_Untreated: 4400
Tubulin_WT_Untreated: 4873
mitotracker_WT_Untreated: 4364
/home/projects/hornsteinlab/galavir/.conda/envs/nova/lib/python3.9/site-packages/sklearn/svm/_classes.py:32: FutureWarning: The default value of `dual` will change from `True` to `'auto'` in 1.5. Set the value of `dual` explicitly to suppress the warning. warnings.warn( /home/projects/hornsteinlab/galavir/.conda/envs/nova/lib/python3.9/site-packages/sklearn/svm/_base.py:1242: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn(
precision recall f1-score support
0 0.97 0.96 0.97 4070
1 0.93 0.79 0.86 3949
2 0.78 0.98 0.87 5874
3 0.99 0.97 0.98 55763
4 0.94 0.95 0.95 5741
5 0.84 0.88 0.86 1933
6 0.92 0.42 0.58 5849
7 0.75 0.89 0.81 2165
8 0.97 0.70 0.82 5637
9 0.44 0.99 0.61 5508
10 0.94 0.95 0.94 3647
11 0.93 0.75 0.83 2717
12 0.95 0.95 0.95 5484
13 0.95 0.69 0.80 5848
14 0.98 0.99 0.99 5741
15 0.88 0.79 0.83 4699
16 0.81 0.97 0.88 4075
17 0.88 0.98 0.93 3375
18 0.96 0.93 0.94 2263
19 0.80 0.75 0.77 1816
20 0.87 0.86 0.86 4069
21 0.88 0.94 0.91 1510
22 0.96 0.99 0.98 5255
23 0.57 0.45 0.50 3269
24 0.82 0.86 0.84 3766
25 0.83 0.87 0.85 3809
26 0.70 0.89 0.78 4782
27 0.91 0.42 0.57 3700
accuracy 0.88 166314
macro avg 0.86 0.84 0.84 166314
weighted avg 0.90 0.88 0.88 166314
Training on Batches: [1], Testing on: [9].
=== Batch [9] ===
Train: (192220, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
Test: (193503, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
ANXA11_WT_Untreated: 4777
CLTC_WT_Untreated: 5749
Calreticulin_WT_Untreated: 6639
DAPI_WT_Untreated: 63181
DCP1A_WT_Untreated: 5177
FMRP_WT_Untreated: 2678
FUS_WT_Untreated: 6586
G3BP1_WT_Untreated: 2689
GM130_WT_Untreated: 6376
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LAMP1_WT_Untreated: 1561
LSM14A_WT_Untreated: 6008
NCL_WT_Untreated: 6573
NEMO_WT_Untreated: 5115
NONO_WT_Untreated: 4542
PEX14_WT_Untreated: 4830
PML_WT_Untreated: 4895
PSD95_WT_Untreated: 5739
PURA_WT_Untreated: 2650
Phalloidin_WT_Untreated: 5431
SNCA_WT_Untreated: 2368
SON_WT_Untreated: 4987
SQSTM1_WT_Untreated: 4527
TDP43_WT_Untreated: 4683
TOMM20_WT_Untreated: 4400
Tubulin_WT_Untreated: 4873
mitotracker_WT_Untreated: 4364
/home/projects/hornsteinlab/galavir/.conda/envs/nova/lib/python3.9/site-packages/sklearn/svm/_classes.py:32: FutureWarning: The default value of `dual` will change from `True` to `'auto'` in 1.5. Set the value of `dual` explicitly to suppress the warning. warnings.warn( /home/projects/hornsteinlab/galavir/.conda/envs/nova/lib/python3.9/site-packages/sklearn/svm/_base.py:1242: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn(
precision recall f1-score support
0 0.98 0.72 0.83 3071
1 0.97 0.93 0.95 6371
2 0.68 0.98 0.80 6644
3 0.99 0.97 0.98 63040
4 0.92 0.96 0.94 6827
5 0.96 0.94 0.95 2353
6 0.92 0.97 0.94 6434
7 0.79 0.92 0.85 2695
8 0.98 0.99 0.98 6365
9 0.64 0.94 0.76 5907
10 0.93 0.95 0.94 5015
11 0.90 0.84 0.87 2610
12 0.97 0.94 0.96 6045
13 1.00 0.74 0.85 6417
14 0.96 1.00 0.98 6775
15 0.59 0.53 0.56 3324
16 0.96 0.96 0.96 4246
17 0.96 0.99 0.97 4849
18 0.97 0.79 0.87 6356
19 0.85 0.62 0.72 2304
20 0.86 0.88 0.87 5279
21 0.76 0.82 0.79 2518
22 0.98 0.98 0.98 3306
23 0.54 0.25 0.34 4078
24 0.66 0.68 0.67 4158
25 0.95 0.86 0.91 5271
26 0.62 0.95 0.75 5968
27 0.56 0.35 0.43 5277
accuracy 0.89 193503
macro avg 0.85 0.84 0.84 193503
weighted avg 0.90 0.89 0.89 193503
=== Overall Accuracy ===
0.8914008707205114 [0.9390967817028458, 0.9251842676698168, 0.8206360309944037, 0.8810623278858064, 0.8910249453496845]
=== Evaluation Metrics ===
Label Accuracy Sensitivity Specificity PPV NPV
ANXA11_WT_Untreated 0.997492 0.926930 0.999232 0.967489 0.998200
CLTC_WT_Untreated 0.995444 0.901176 0.998336 0.943227 0.996973
Calreticulin_WT_Untreated 0.986302 0.919286 0.988504 0.724340 0.997324
DAPI_WT_Untreated 0.990922 0.979326 0.996726 0.993365 0.989724
DCP1A_WT_Untreated 0.993878 0.876656 0.997728 0.926891 0.995955
FMRP_WT_Untreated 0.997693 0.906135 0.998682 0.881426 0.998985
FUS_WT_Untreated 0.988392 0.732634 0.994715 0.774107 0.993399
G3BP1_WT_Untreated 0.995185 0.895311 0.996358 0.742712 0.998768
GM130_WT_Untreated 0.992218 0.789979 0.998660 0.949440 0.993346
HNRNPA1_WT_Untreated 0.977525 0.836500 0.981906 0.589557 0.994853
KIF5A_WT_Untreated 0.996927 0.940212 0.998427 0.940526 0.998419
LAMP1_WT_Untreated 0.991675 0.828342 0.993648 0.611766 0.997917
LSM14A_WT_Untreated 0.995464 0.905588 0.998327 0.945165 0.996997
NCL_WT_Untreated 0.987508 0.615612 0.999659 0.983314 0.987593
NEMO_WT_Untreated 0.998012 0.990404 0.998246 0.945545 0.999704
NONO_WT_Untreated 0.990582 0.786002 0.995960 0.836439 0.994384
PEX14_WT_Untreated 0.997217 0.975180 0.997776 0.917562 0.999369
PML_WT_Untreated 0.992674 0.989166 0.992762 0.774344 0.999726
PSD95_WT_Untreated 0.995894 0.893387 0.998674 0.948096 0.997113
PURA_WT_Untreated 0.995298 0.712149 0.998331 0.820489 0.996921
Phalloidin_WT_Untreated 0.991936 0.858148 0.995707 0.849279 0.996001
SNCA_WT_Untreated 0.995786 0.889010 0.997254 0.816510 0.998472
SON_WT_Untreated 0.999091 0.984154 0.999512 0.982746 0.999553
SQSTM1_WT_Untreated 0.980144 0.491013 0.991922 0.594108 0.987795
TDP43_WT_Untreated 0.989904 0.797068 0.994691 0.788470 0.994961
TOMM20_WT_Untreated 0.993295 0.877687 0.996452 0.871043 0.996660
Tubulin_WT_Untreated 0.983875 0.869681 0.987411 0.681381 0.995931
mitotracker_WT_Untreated 0.982718 0.469693 0.996888 0.806515 0.985520
Macro Average 0.991895 0.844158 0.995803 0.843066 0.995734
2025-08-20 22:41:32 INFO: [load_embeddings] multiplex=False 2025-08-20 22:41:32 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-20 22:41:32 INFO: [load_embeddings] input_folders = ['batch1'] 2025-08-20 22:41:32 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/
=== Running RandomForestClassifier === Loading all batches...
2025-08-20 22:42:08 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-20 22:42:14 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-20 22:42:18 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-20 22:42:19 INFO: [load_embeddings] embeddings shape: (192220, 2048) 2025-08-20 22:42:19 INFO: [load_embeddings] labels shape: (192220,) 2025-08-20 22:42:19 INFO: [load_embeddings] example label: ANXA11_WT_Untreated 2025-08-20 22:42:19 INFO: [load_embeddings] paths shape: (192220,) 2025-08-20 22:42:20 INFO: [load_embeddings] multiplex=False 2025-08-20 22:42:20 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-20 22:42:20 INFO: [load_embeddings] input_folders = ['batch2'] 2025-08-20 22:42:20 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/ 2025-08-20 22:42:45 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-20 22:42:49 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-20 22:42:51 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-20 22:42:52 INFO: [load_embeddings] embeddings shape: (137464, 2048) 2025-08-20 22:42:52 INFO: [load_embeddings] labels shape: (137464,) 2025-08-20 22:42:52 INFO: [load_embeddings] example label: ANXA11_WT_Untreated 2025-08-20 22:42:52 INFO: [load_embeddings] paths shape: (137464,) 2025-08-20 22:42:53 INFO: [load_embeddings] multiplex=False 2025-08-20 22:42:53 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-20 22:42:53 INFO: [load_embeddings] input_folders = ['batch3'] 2025-08-20 22:42:53 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/ 2025-08-20 22:43:19 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-20 22:43:24 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-20 22:43:26 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-20 22:43:27 INFO: [load_embeddings] embeddings shape: (130788, 2048) 2025-08-20 22:43:27 INFO: [load_embeddings] labels shape: (130788,) 2025-08-20 22:43:27 INFO: [load_embeddings] example label: ANXA11_WT_Untreated 2025-08-20 22:43:27 INFO: [load_embeddings] paths shape: (130788,) 2025-08-20 22:43:28 INFO: [load_embeddings] multiplex=False 2025-08-20 22:43:28 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-20 22:43:28 INFO: [load_embeddings] input_folders = ['batch7'] 2025-08-20 22:43:28 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/ 2025-08-20 22:44:02 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-20 22:44:08 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-20 22:44:12 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-20 22:44:13 INFO: [load_embeddings] embeddings shape: (185840, 2048) 2025-08-20 22:44:13 INFO: [load_embeddings] labels shape: (185840,) 2025-08-20 22:44:13 INFO: [load_embeddings] example label: ANXA11_WT_Untreated 2025-08-20 22:44:13 INFO: [load_embeddings] paths shape: (185840,) 2025-08-20 22:44:14 INFO: [load_embeddings] multiplex=False 2025-08-20 22:44:14 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-20 22:44:14 INFO: [load_embeddings] input_folders = ['batch8'] 2025-08-20 22:44:14 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/ 2025-08-20 22:44:54 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-20 22:45:01 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-20 22:45:04 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-20 22:45:05 INFO: [load_embeddings] embeddings shape: (166314, 2048) 2025-08-20 22:45:05 INFO: [load_embeddings] labels shape: (166314,) 2025-08-20 22:45:05 INFO: [load_embeddings] example label: ANXA11_WT_Untreated 2025-08-20 22:45:05 INFO: [load_embeddings] paths shape: (166314,) 2025-08-20 22:45:06 INFO: [load_embeddings] multiplex=False 2025-08-20 22:45:06 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-20 22:45:06 INFO: [load_embeddings] input_folders = ['batch9'] 2025-08-20 22:45:06 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/ 2025-08-20 22:45:44 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-20 22:45:52 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-20 22:45:56 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-20 22:45:57 INFO: [load_embeddings] embeddings shape: (193503, 2048) 2025-08-20 22:45:57 INFO: [load_embeddings] labels shape: (193503,) 2025-08-20 22:45:57 INFO: [load_embeddings] example label: ANXA11_WT_Untreated 2025-08-20 22:45:57 INFO: [load_embeddings] paths shape: (193503,)
Batches loaded.
Training on Batches: [1], Testing on: [2].
=== Batch [2] ===
Train: (192220, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
Test: (137464, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
ANXA11_WT_Untreated: 4777
CLTC_WT_Untreated: 5749
Calreticulin_WT_Untreated: 6639
DAPI_WT_Untreated: 63181
DCP1A_WT_Untreated: 5177
FMRP_WT_Untreated: 2678
FUS_WT_Untreated: 6586
G3BP1_WT_Untreated: 2689
GM130_WT_Untreated: 6376
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LAMP1_WT_Untreated: 1561
LSM14A_WT_Untreated: 6008
NCL_WT_Untreated: 6573
NEMO_WT_Untreated: 5115
NONO_WT_Untreated: 4542
PEX14_WT_Untreated: 4830
PML_WT_Untreated: 4895
PSD95_WT_Untreated: 5739
PURA_WT_Untreated: 2650
Phalloidin_WT_Untreated: 5431
SNCA_WT_Untreated: 2368
SON_WT_Untreated: 4987
SQSTM1_WT_Untreated: 4527
TDP43_WT_Untreated: 4683
TOMM20_WT_Untreated: 4400
Tubulin_WT_Untreated: 4873
mitotracker_WT_Untreated: 4364
precision recall f1-score support
0 0.82 0.97 0.89 3787
1 0.87 0.90 0.89 4311
2 0.78 0.84 0.81 3390
3 0.93 1.00 0.96 45031
4 0.92 0.86 0.89 4057
5 0.80 0.85 0.83 938
6 0.99 0.31 0.47 3613
7 0.59 0.88 0.71 953
8 0.87 0.98 0.92 3228
9 0.94 0.92 0.93 4216
10 0.96 0.82 0.88 3856
11 0.79 0.38 0.52 1375
12 0.87 0.93 0.90 4171
13 0.99 0.98 0.98 3624
14 0.97 1.00 0.99 3952
15 0.91 0.72 0.81 4542
16 0.92 0.99 0.95 3866
17 0.94 1.00 0.97 3757
18 0.95 0.87 0.91 4292
19 0.81 0.62 0.70 786
20 0.78 0.85 0.81 3857
21 0.99 0.82 0.90 2469
22 1.00 0.99 0.99 4551
23 0.63 0.54 0.58 3546
24 0.73 0.84 0.78 3696
25 0.75 0.85 0.80 3701
26 0.87 0.63 0.73 3897
27 0.87 0.90 0.88 4002
accuracy 0.89 137464
macro avg 0.87 0.83 0.83 137464
weighted avg 0.90 0.89 0.89 137464
Training on Batches: [1], Testing on: [3].
=== Batch [3] ===
Train: (192220, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
Test: (130788, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
ANXA11_WT_Untreated: 4777
CLTC_WT_Untreated: 5749
Calreticulin_WT_Untreated: 6639
DAPI_WT_Untreated: 63181
DCP1A_WT_Untreated: 5177
FMRP_WT_Untreated: 2678
FUS_WT_Untreated: 6586
G3BP1_WT_Untreated: 2689
GM130_WT_Untreated: 6376
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LAMP1_WT_Untreated: 1561
LSM14A_WT_Untreated: 6008
NCL_WT_Untreated: 6573
NEMO_WT_Untreated: 5115
NONO_WT_Untreated: 4542
PEX14_WT_Untreated: 4830
PML_WT_Untreated: 4895
PSD95_WT_Untreated: 5739
PURA_WT_Untreated: 2650
Phalloidin_WT_Untreated: 5431
SNCA_WT_Untreated: 2368
SON_WT_Untreated: 4987
SQSTM1_WT_Untreated: 4527
TDP43_WT_Untreated: 4683
TOMM20_WT_Untreated: 4400
Tubulin_WT_Untreated: 4873
mitotracker_WT_Untreated: 4364
precision recall f1-score support
0 0.88 0.97 0.93 3867
1 0.81 0.83 0.82 3424
2 0.90 0.87 0.89 3987
3 0.95 1.00 0.97 43178
4 0.89 0.77 0.83 3476
5 0.83 0.85 0.84 1754
6 0.99 0.54 0.70 3703
7 0.61 0.84 0.71 1846
8 0.90 0.96 0.93 3826
9 0.98 0.94 0.96 3833
10 0.96 0.86 0.91 2963
11 0.71 0.66 0.68 918
12 0.91 0.91 0.91 3797
13 0.99 0.99 0.99 3696
14 0.91 0.99 0.95 3444
15 0.97 0.83 0.90 3657
16 0.87 0.99 0.93 3947
17 0.93 1.00 0.96 3004
18 0.92 0.92 0.92 3514
19 0.79 0.51 0.62 1557
20 0.60 0.83 0.70 3575
21 0.97 0.85 0.91 2414
22 1.00 0.99 0.99 3654
23 0.52 0.47 0.49 3160
24 0.86 0.91 0.89 3571
25 0.50 0.88 0.64 3831
26 0.84 0.43 0.57 3203
27 0.58 0.11 0.19 3989
accuracy 0.87 130788
macro avg 0.84 0.81 0.81 130788
weighted avg 0.88 0.87 0.87 130788
Training on Batches: [1], Testing on: [7].
=== Batch [7] ===
Train: (192220, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
Test: (185840, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
ANXA11_WT_Untreated: 4777
CLTC_WT_Untreated: 5749
Calreticulin_WT_Untreated: 6639
DAPI_WT_Untreated: 63181
DCP1A_WT_Untreated: 5177
FMRP_WT_Untreated: 2678
FUS_WT_Untreated: 6586
G3BP1_WT_Untreated: 2689
GM130_WT_Untreated: 6376
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LAMP1_WT_Untreated: 1561
LSM14A_WT_Untreated: 6008
NCL_WT_Untreated: 6573
NEMO_WT_Untreated: 5115
NONO_WT_Untreated: 4542
PEX14_WT_Untreated: 4830
PML_WT_Untreated: 4895
PSD95_WT_Untreated: 5739
PURA_WT_Untreated: 2650
Phalloidin_WT_Untreated: 5431
SNCA_WT_Untreated: 2368
SON_WT_Untreated: 4987
SQSTM1_WT_Untreated: 4527
TDP43_WT_Untreated: 4683
TOMM20_WT_Untreated: 4400
Tubulin_WT_Untreated: 4873
mitotracker_WT_Untreated: 4364
precision recall f1-score support
0 0.97 0.86 0.91 4789
1 0.88 0.87 0.87 6170
2 0.52 0.87 0.65 5999
3 0.94 1.00 0.97 64486
4 0.41 0.17 0.24 5786
5 0.96 0.61 0.75 1726
6 0.00 0.00 0.00 37
7 0.56 0.69 0.62 1788
8 0.39 0.37 0.38 6070
9 0.87 0.02 0.04 5062
10 0.90 0.92 0.91 5493
11 0.03 0.05 0.04 2097
12 0.49 0.13 0.21 5627
13 0.15 0.00 0.00 6165
14 0.54 1.00 0.70 4370
15 0.60 0.87 0.71 4624
16 0.90 0.96 0.93 4011
17 0.43 0.98 0.60 4952
18 0.56 0.95 0.71 5064
19 0.89 0.36 0.51 2163
20 0.76 0.91 0.83 5532
21 0.93 0.43 0.59 2126
22 1.00 0.99 0.99 5574
23 0.53 0.40 0.45 5085
24 0.75 0.32 0.45 4525
25 0.82 0.79 0.81 5021
26 0.65 0.90 0.75 6590
27 0.06 0.01 0.01 4908
accuracy 0.74 185840
macro avg 0.62 0.59 0.56 185840
weighted avg 0.73 0.74 0.71 185840
Training on Batches: [1], Testing on: [8].
=== Batch [8] ===
Train: (192220, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
Test: (166314, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
ANXA11_WT_Untreated: 4777
CLTC_WT_Untreated: 5749
Calreticulin_WT_Untreated: 6639
DAPI_WT_Untreated: 63181
DCP1A_WT_Untreated: 5177
FMRP_WT_Untreated: 2678
FUS_WT_Untreated: 6586
G3BP1_WT_Untreated: 2689
GM130_WT_Untreated: 6376
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LAMP1_WT_Untreated: 1561
LSM14A_WT_Untreated: 6008
NCL_WT_Untreated: 6573
NEMO_WT_Untreated: 5115
NONO_WT_Untreated: 4542
PEX14_WT_Untreated: 4830
PML_WT_Untreated: 4895
PSD95_WT_Untreated: 5739
PURA_WT_Untreated: 2650
Phalloidin_WT_Untreated: 5431
SNCA_WT_Untreated: 2368
SON_WT_Untreated: 4987
SQSTM1_WT_Untreated: 4527
TDP43_WT_Untreated: 4683
TOMM20_WT_Untreated: 4400
Tubulin_WT_Untreated: 4873
mitotracker_WT_Untreated: 4364
precision recall f1-score support
0 0.94 0.91 0.93 4070
1 0.81 0.75 0.78 3949
2 0.75 0.97 0.84 5874
3 0.90 1.00 0.94 55763
4 0.80 0.92 0.86 5741
5 0.89 0.73 0.80 1933
6 0.91 0.15 0.25 5849
7 0.61 0.69 0.64 2165
8 0.89 0.48 0.63 5637
9 0.74 0.86 0.80 5508
10 0.96 0.93 0.94 3647
11 0.59 0.09 0.15 2717
12 0.64 0.92 0.75 5484
13 0.94 0.83 0.88 5848
14 0.96 1.00 0.98 5741
15 0.79 0.65 0.71 4699
16 0.82 0.97 0.89 4075
17 0.85 0.98 0.91 3375
18 0.85 0.90 0.88 2263
19 0.85 0.46 0.60 1816
20 0.76 0.87 0.81 4069
21 0.97 0.76 0.85 1510
22 0.98 1.00 0.99 5255
23 0.57 0.39 0.46 3269
24 0.76 0.75 0.76 3766
25 0.55 0.80 0.65 3809
26 0.70 0.83 0.76 4782
27 0.48 0.06 0.10 3700
accuracy 0.83 166314
macro avg 0.79 0.74 0.73 166314
weighted avg 0.83 0.83 0.81 166314
Training on Batches: [1], Testing on: [9].
=== Batch [9] ===
Train: (192220, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
Test: (193503, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
ANXA11_WT_Untreated: 4777
CLTC_WT_Untreated: 5749
Calreticulin_WT_Untreated: 6639
DAPI_WT_Untreated: 63181
DCP1A_WT_Untreated: 5177
FMRP_WT_Untreated: 2678
FUS_WT_Untreated: 6586
G3BP1_WT_Untreated: 2689
GM130_WT_Untreated: 6376
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LAMP1_WT_Untreated: 1561
LSM14A_WT_Untreated: 6008
NCL_WT_Untreated: 6573
NEMO_WT_Untreated: 5115
NONO_WT_Untreated: 4542
PEX14_WT_Untreated: 4830
PML_WT_Untreated: 4895
PSD95_WT_Untreated: 5739
PURA_WT_Untreated: 2650
Phalloidin_WT_Untreated: 5431
SNCA_WT_Untreated: 2368
SON_WT_Untreated: 4987
SQSTM1_WT_Untreated: 4527
TDP43_WT_Untreated: 4683
TOMM20_WT_Untreated: 4400
Tubulin_WT_Untreated: 4873
mitotracker_WT_Untreated: 4364
precision recall f1-score support
0 0.92 0.62 0.74 3071
1 0.90 0.91 0.90 6371
2 0.58 0.98 0.73 6644
3 0.92 1.00 0.96 63040
4 0.78 0.92 0.84 6827
5 0.98 0.76 0.85 2353
6 0.92 0.60 0.73 6434
7 0.78 0.84 0.81 2695
8 0.92 0.96 0.94 6365
9 0.94 0.66 0.78 5907
10 0.85 0.94 0.89 5015
11 0.44 0.07 0.12 2610
12 0.80 0.87 0.83 6045
13 1.00 0.82 0.90 6417
14 0.93 1.00 0.96 6775
15 0.41 0.52 0.46 3324
16 0.90 0.96 0.93 4246
17 0.94 0.99 0.96 4849
18 0.85 0.88 0.87 6356
19 0.93 0.40 0.56 2304
20 0.72 0.91 0.80 5279
21 0.79 0.48 0.59 2518
22 0.98 0.99 0.99 3306
23 0.54 0.22 0.31 4078
24 0.56 0.40 0.47 4158
25 0.79 0.71 0.75 5271
26 0.63 0.92 0.75 5968
27 0.33 0.10 0.15 5277
accuracy 0.84 193503
macro avg 0.79 0.73 0.73 193503
weighted avg 0.83 0.84 0.82 193503
=== Overall Accuracy ===
0.836347164574688 [0.893172030495257, 0.8746291708719455, 0.7441239776151528, 0.83137919838378, 0.8384314455073048]
=== Evaluation Metrics ===
Label Accuracy Sensitivity Specificity PPV NPV
ANXA11_WT_Untreated 0.994766 0.876379 0.997685 0.903221 0.996954
CLTC_WT_Untreated 0.991804 0.860681 0.995826 0.863497 0.995727
Calreticulin_WT_Untreated 0.981796 0.918282 0.983884 0.651845 0.997278
DAPI_WT_Untreated 0.972702 0.999090 0.959494 0.925070 0.999526
DCP1A_WT_Untreated 0.984778 0.721752 0.993419 0.782740 0.990883
FMRP_WT_Untreated 0.996398 0.750000 0.999061 0.896211 0.997302
FUS_WT_Untreated 0.983407 0.397382 0.997895 0.823536 0.985290
G3BP1_WT_Untreated 0.992369 0.779930 0.994864 0.640696 0.997409
GM130_WT_Untreated 0.984615 0.712847 0.993272 0.771427 0.990875
HNRNPA1_WT_Untreated 0.987039 0.661013 0.997169 0.878842 0.989548
KIF5A_WT_Untreated 0.995185 0.899352 0.997720 0.912534 0.997339
LAMP1_WT_Untreated 0.984808 0.170423 0.994648 0.277852 0.990023
LSM14A_WT_Untreated 0.984426 0.731810 0.992472 0.755879 0.991466
NCL_WT_Untreated 0.989144 0.673553 0.999454 0.975807 0.989441
NEMO_WT_Untreated 0.994012 0.998600 0.993871 0.833608 0.999957
NONO_WT_Untreated 0.985179 0.727094 0.991963 0.703962 0.992820
PEX14_WT_Untreated 0.996076 0.972748 0.996668 0.881075 0.999307
PML_WT_Untreated 0.990250 0.986758 0.990337 0.719437 0.999664
PSD95_WT_Untreated 0.990644 0.904323 0.992985 0.777569 0.997394
PURA_WT_Untreated 0.993294 0.441456 0.999205 0.856115 0.994048
Phalloidin_WT_Untreated 0.987427 0.877958 0.990513 0.722868 0.996539
SNCA_WT_Untreated 0.994851 0.664673 0.999390 0.937388 0.995409
SON_WT_Untreated 0.999538 0.991853 0.999755 0.991321 0.999770
SQSTM1_WT_Untreated 0.978334 0.396227 0.992351 0.555043 0.985561
TDP43_WT_Untreated 0.985583 0.623250 0.994578 0.740509 0.990684
TOMM20_WT_Untreated 0.983939 0.799519 0.988975 0.664439 0.994495
Tubulin_WT_Untreated 0.982800 0.785966 0.988894 0.686600 0.993344
mitotracker_WT_Untreated 0.975874 0.219236 0.996773 0.652339 0.978824
Macro Average 0.987894 0.733648 0.993326 0.777908 0.993817
2025-08-20 23:49:52 INFO: [load_embeddings] multiplex=False 2025-08-20 23:49:52 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-20 23:49:52 INFO: [load_embeddings] input_folders = ['batch1'] 2025-08-20 23:49:52 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/
=== Running ExtraTreesClassifier === Loading all batches...
2025-08-20 23:50:28 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-20 23:50:35 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-20 23:50:39 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-20 23:50:40 INFO: [load_embeddings] embeddings shape: (192220, 2048) 2025-08-20 23:50:40 INFO: [load_embeddings] labels shape: (192220,) 2025-08-20 23:50:40 INFO: [load_embeddings] example label: ANXA11_WT_Untreated 2025-08-20 23:50:40 INFO: [load_embeddings] paths shape: (192220,) 2025-08-20 23:50:40 INFO: [load_embeddings] multiplex=False 2025-08-20 23:50:40 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-20 23:50:40 INFO: [load_embeddings] input_folders = ['batch2'] 2025-08-20 23:50:40 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/ 2025-08-20 23:51:05 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-20 23:51:10 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-20 23:51:13 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-20 23:51:13 INFO: [load_embeddings] embeddings shape: (137464, 2048) 2025-08-20 23:51:13 INFO: [load_embeddings] labels shape: (137464,) 2025-08-20 23:51:13 INFO: [load_embeddings] example label: ANXA11_WT_Untreated 2025-08-20 23:51:13 INFO: [load_embeddings] paths shape: (137464,) 2025-08-20 23:51:14 INFO: [load_embeddings] multiplex=False 2025-08-20 23:51:14 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-20 23:51:14 INFO: [load_embeddings] input_folders = ['batch3'] 2025-08-20 23:51:14 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/ 2025-08-20 23:51:40 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-20 23:51:45 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-20 23:51:48 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-20 23:51:49 INFO: [load_embeddings] embeddings shape: (130788, 2048) 2025-08-20 23:51:49 INFO: [load_embeddings] labels shape: (130788,) 2025-08-20 23:51:49 INFO: [load_embeddings] example label: ANXA11_WT_Untreated 2025-08-20 23:51:49 INFO: [load_embeddings] paths shape: (130788,) 2025-08-20 23:51:49 INFO: [load_embeddings] multiplex=False 2025-08-20 23:51:49 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-20 23:51:49 INFO: [load_embeddings] input_folders = ['batch7'] 2025-08-20 23:51:49 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/ 2025-08-20 23:52:24 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-20 23:52:31 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-20 23:52:35 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-20 23:52:36 INFO: [load_embeddings] embeddings shape: (185840, 2048) 2025-08-20 23:52:36 INFO: [load_embeddings] labels shape: (185840,) 2025-08-20 23:52:36 INFO: [load_embeddings] example label: ANXA11_WT_Untreated 2025-08-20 23:52:36 INFO: [load_embeddings] paths shape: (185840,) 2025-08-20 23:52:37 INFO: [load_embeddings] multiplex=False 2025-08-20 23:52:37 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-20 23:52:37 INFO: [load_embeddings] input_folders = ['batch8'] 2025-08-20 23:52:37 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/ 2025-08-20 23:53:16 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-20 23:53:23 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-20 23:53:27 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-20 23:53:28 INFO: [load_embeddings] embeddings shape: (166314, 2048) 2025-08-20 23:53:28 INFO: [load_embeddings] labels shape: (166314,) 2025-08-20 23:53:28 INFO: [load_embeddings] example label: ANXA11_WT_Untreated 2025-08-20 23:53:28 INFO: [load_embeddings] paths shape: (166314,) 2025-08-20 23:53:29 INFO: [load_embeddings] multiplex=False 2025-08-20 23:53:29 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-20 23:53:29 INFO: [load_embeddings] input_folders = ['batch9'] 2025-08-20 23:53:29 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/ 2025-08-20 23:54:05 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-20 23:54:12 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-20 23:54:16 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-20 23:54:17 INFO: [load_embeddings] embeddings shape: (193503, 2048) 2025-08-20 23:54:17 INFO: [load_embeddings] labels shape: (193503,) 2025-08-20 23:54:17 INFO: [load_embeddings] example label: ANXA11_WT_Untreated 2025-08-20 23:54:17 INFO: [load_embeddings] paths shape: (193503,)
Batches loaded.
Training on Batches: [1], Testing on: [2].
=== Batch [2] ===
Train: (192220, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
Test: (137464, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
ANXA11_WT_Untreated: 4777
CLTC_WT_Untreated: 5749
Calreticulin_WT_Untreated: 6639
DAPI_WT_Untreated: 63181
DCP1A_WT_Untreated: 5177
FMRP_WT_Untreated: 2678
FUS_WT_Untreated: 6586
G3BP1_WT_Untreated: 2689
GM130_WT_Untreated: 6376
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LAMP1_WT_Untreated: 1561
LSM14A_WT_Untreated: 6008
NCL_WT_Untreated: 6573
NEMO_WT_Untreated: 5115
NONO_WT_Untreated: 4542
PEX14_WT_Untreated: 4830
PML_WT_Untreated: 4895
PSD95_WT_Untreated: 5739
PURA_WT_Untreated: 2650
Phalloidin_WT_Untreated: 5431
SNCA_WT_Untreated: 2368
SON_WT_Untreated: 4987
SQSTM1_WT_Untreated: 4527
TDP43_WT_Untreated: 4683
TOMM20_WT_Untreated: 4400
Tubulin_WT_Untreated: 4873
mitotracker_WT_Untreated: 4364
precision recall f1-score support
0 0.81 0.97 0.88 3787
1 0.90 0.91 0.90 4311
2 0.74 0.87 0.80 3390
3 0.92 1.00 0.96 45031
4 0.92 0.86 0.89 4057
5 0.84 0.83 0.84 938
6 0.99 0.20 0.33 3613
7 0.61 0.88 0.72 953
8 0.85 0.98 0.91 3228
9 0.95 0.88 0.91 4216
10 0.97 0.82 0.89 3856
11 0.83 0.32 0.46 1375
12 0.86 0.93 0.90 4171
13 0.99 0.98 0.98 3624
14 0.97 1.00 0.98 3952
15 0.92 0.72 0.80 4542
16 0.92 0.98 0.95 3866
17 0.94 1.00 0.97 3757
18 0.95 0.88 0.91 4292
19 0.89 0.52 0.65 786
20 0.77 0.87 0.82 3857
21 0.99 0.78 0.87 2469
22 1.00 0.99 0.99 4551
23 0.64 0.48 0.55 3546
24 0.74 0.81 0.78 3696
25 0.75 0.87 0.80 3701
26 0.82 0.66 0.73 3897
27 0.87 0.88 0.88 4002
accuracy 0.89 137464
macro avg 0.87 0.82 0.82 137464
weighted avg 0.89 0.89 0.88 137464
Training on Batches: [1], Testing on: [3].
=== Batch [3] ===
Train: (192220, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
Test: (130788, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27]
ANXA11_WT_Untreated: 4777
CLTC_WT_Untreated: 5749
Calreticulin_WT_Untreated: 6639
DAPI_WT_Untreated: 63181
DCP1A_WT_Untreated: 5177
FMRP_WT_Untreated: 2678
FUS_WT_Untreated: 6586
G3BP1_WT_Untreated: 2689
GM130_WT_Untreated: 6376
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LAMP1_WT_Untreated: 1561
LSM14A_WT_Untreated: 6008
NCL_WT_Untreated: 6573
NEMO_WT_Untreated: 5115
NONO_WT_Untreated: 4542
PEX14_WT_Untreated: 4830
PML_WT_Untreated: 4895
PSD95_WT_Untreated: 5739
PURA_WT_Untreated: 2650
Phalloidin_WT_Untreated: 5431
SNCA_WT_Untreated: 2368
SON_WT_Untreated: 4987
SQSTM1_WT_Untreated: 4527
TDP43_WT_Untreated: 4683
TOMM20_WT_Untreated: 4400
Tubulin_WT_Untreated: 4873
mitotracker_WT_Untreated: 4364
pretrained_dataset_config = {
"path_to_embeddings": "/home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/pretrained_model",
"multiplexed": False,
"config_fmt": "newNeuronsD8FigureConfig_UMAP1_B{batch}",
"config_dir": "manuscript/manuscript_figures_data_config",
}
## Baseline
run_baseline_model(
dataset_config= pretrained_dataset_config,
batches=[1, 2, 3, 7, 8, 9],
balance=False,
norm=False,
choose_features=False,
top_k=100,
label_map=None,
classifier_class=cuMLLogisticRegression,
classifier_kwargs={},
train_specific_batches = [1],
results_csv="classification_results-indi.csv"
)
for clf_class, clf_kwargs in additional_classifiers:
print(f"\n=== Running {clf_class.__name__} ===")
run_baseline_model(
dataset_config=pretrained_dataset_config,
batches=[1, 2, 3, 7, 8, 9],
classifier_class=clf_class,
classifier_kwargs=clf_kwargs,
train_specific_batches=[1],
results_csv="classification_results-indi.csv"
)
2025-08-21 14:28:16 INFO: [load_embeddings] multiplex=False 2025-08-21 14:28:16 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-21 14:28:16 INFO: [load_embeddings] input_folders = ['batch1'] 2025-08-21 14:28:16 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/pretrained_model
=== Running ExtraTreesClassifier === Loading all batches...
2025-08-21 14:28:26 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-21 14:28:28 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-21 14:28:29 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-21 14:28:30 INFO: [load_embeddings] embeddings shape: (196119, 192) 2025-08-21 14:28:30 INFO: [load_embeddings] labels shape: (196119,) 2025-08-21 14:28:30 INFO: [load_embeddings] example label: DAPI_WT_Untreated 2025-08-21 14:28:30 INFO: [load_embeddings] paths shape: (196119,) 2025-08-21 14:28:30 INFO: [load_embeddings] multiplex=False 2025-08-21 14:28:30 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-21 14:28:30 INFO: [load_embeddings] input_folders = ['batch2'] 2025-08-21 14:28:30 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/pretrained_model 2025-08-21 14:28:36 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-21 14:28:37 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-21 14:28:38 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-21 14:28:39 INFO: [load_embeddings] embeddings shape: (141079, 192) 2025-08-21 14:28:39 INFO: [load_embeddings] labels shape: (141079,) 2025-08-21 14:28:39 INFO: [load_embeddings] example label: TOMM20_WT_Untreated 2025-08-21 14:28:39 INFO: [load_embeddings] paths shape: (141079,) 2025-08-21 14:28:39 INFO: [load_embeddings] multiplex=False 2025-08-21 14:28:39 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-21 14:28:39 INFO: [load_embeddings] input_folders = ['batch3'] 2025-08-21 14:28:39 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/pretrained_model 2025-08-21 14:28:46 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-21 14:28:47 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-21 14:28:48 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-21 14:28:48 INFO: [load_embeddings] embeddings shape: (134336, 192) 2025-08-21 14:28:48 INFO: [load_embeddings] labels shape: (134336,) 2025-08-21 14:28:48 INFO: [load_embeddings] example label: LAMP1_WT_Untreated 2025-08-21 14:28:48 INFO: [load_embeddings] paths shape: (134336,) 2025-08-21 14:28:49 INFO: [load_embeddings] multiplex=False 2025-08-21 14:28:49 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-21 14:28:49 INFO: [load_embeddings] input_folders = ['batch7'] 2025-08-21 14:28:49 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/pretrained_model 2025-08-21 14:28:57 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-21 14:28:59 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-21 14:29:00 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-21 14:29:01 INFO: [load_embeddings] embeddings shape: (189079, 192) 2025-08-21 14:29:01 INFO: [load_embeddings] labels shape: (189079,) 2025-08-21 14:29:01 INFO: [load_embeddings] example label: DAPI_WT_Untreated 2025-08-21 14:29:01 INFO: [load_embeddings] paths shape: (189079,) 2025-08-21 14:29:01 INFO: [load_embeddings] multiplex=False 2025-08-21 14:29:01 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-21 14:29:01 INFO: [load_embeddings] input_folders = ['batch8'] 2025-08-21 14:29:01 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/pretrained_model 2025-08-21 14:29:10 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-21 14:29:12 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-21 14:29:14 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-21 14:29:15 INFO: [load_embeddings] embeddings shape: (169304, 192) 2025-08-21 14:29:15 INFO: [load_embeddings] labels shape: (169304,) 2025-08-21 14:29:15 INFO: [load_embeddings] example label: G3BP1_WT_Untreated 2025-08-21 14:29:15 INFO: [load_embeddings] paths shape: (169304,) 2025-08-21 14:29:15 INFO: [load_embeddings] multiplex=False 2025-08-21 14:29:15 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-21 14:29:15 INFO: [load_embeddings] input_folders = ['batch9'] 2025-08-21 14:29:15 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/pretrained_model 2025-08-21 14:29:24 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
--------------------------------------------------------------------------- KeyboardInterrupt Traceback (most recent call last) Cell In[16], line 3 1 for clf_class, clf_kwargs in additional_classifiers1: 2 print(f"\n=== Running {clf_class.__name__} ===") ----> 3 run_baseline_model( 4 dataset_config=pretrained_dataset_config, 5 batches=[1, 2, 3, 7, 8, 9], 6 classifier_class=clf_class, 7 classifier_kwargs=clf_kwargs, 8 train_specific_batches=[1], 9 results_csv="classification_results-indi.csv" 10 ) File /home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/classifier/utils.py:292, in run_baseline_model(dataset_config, batches, balance, norm, choose_features, top_k, apply_pca, pca_components, label_map, classifier_class, classifier_kwargs, test_specific_batches, train_specific_batches, return_proba, calculate_auc, results_csv) 272 header = not os.path.exists(results_csv) 273 pd.DataFrame([row]).to_csv(results_csv, mode="a", header=header, index=False) 275 def run_baseline_model( 276 dataset_config, # dict with paths/loading settings for embeddings 277 batches=[1, 2, 3, 7, 8, 9,], # list of batch IDs to include in the experiment 278 balance=False, # whether to balance class distributions during training 279 norm=False, # whether to normalize features before training 280 choose_features=False, # whether to select top features (e.g., univariate ranking) 281 top_k=100, # number of features to keep if choose_features=True 282 apply_pca=False, # whether to reduce dimensionality with PCA 283 pca_components=50, # number of PCA components if apply_pca=True 284 label_map=None, # optional mapping to merge/remap labels, e.g. {"WT":0,"KO":1} 285 classifier_class=cuMLLogisticRegression, # classifier class to use (any sklearn/cuML-compatible estimator) 286 classifier_kwargs=dict(), # extra arguments for the classifier constructor (e.g. {"max_depth":10}) 287 test_specific_batches=None, # int or list: which batches to use as test folds; None = default LOOCV 288 train_specific_batches=None, # int or list: which batches to use for training; None = complement of test 289 return_proba=False, # if True, return DataFrame of predicted probabilities along with metrics 290 calculate_auc=False, # if True, compute ROC AUC for the predictions 291 results_csv=None --> 292 ): 293 accuracies = [] 294 accumulated_cm = None File /home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/classifier/utils.py:128, in load_all_batches(batch_ids, dataset_config) 125 config_data = load_config_file(config_path_data, 'data') 126 config_data.OUTPUTS_FOLDER = path_to_embeddings --> 128 X, y, _ = load_embeddings(path_to_embeddings, config_data) 130 if multiplexed: 131 analyzer = AnalyzerMultiplexMarkers(config_data, path_to_embeddings) File /home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/embeddings/embeddings_utils.py:258, in load_embeddings(model_output_folder, config_data, sample_fraction, multiplex) 256 paths = np.concatenate(paths) 257 labels = edit_labels_by_config(labels, config_data, multiplex) --> 258 filtered_labels, filtered_embeddings, filtered_paths = __filter(labels, embeddings, paths, config_data, multiplex) 260 if sample_fraction < 1.0: 261 logging.info(f"[load_embeddings] Sampling {sample_fraction*100:.1f}% of each label group (from {len(filtered_labels)} total labels)") File /home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/embeddings/embeddings_utils.py:377, in __filter(labels, embeddings, paths, config_data, multiplex) 375 if markers_to_exclude and (not multiplex): 376 logging.info(f"[embeddings_utils._filter] markers_to_exclude = {markers_to_exclude}") --> 377 labels, embeddings, paths = __filter_by_label_part(labels, embeddings, paths, markers_to_exclude, 378 get_markers_from_labels, include=False) 379 if markers and (not multiplex): 380 logging.info(f"[embeddings_utils._filter] markers = {markers}") File /home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/embeddings/embeddings_utils.py:423, in __filter_by_label_part(labels, embeddings, paths, filter_on, get_parts_from_labels, config_data, include) 421 indices_to_keep = np.where(~np.isin(parts_of_labels, filter_on))[0] 422 labels = labels[indices_to_keep] --> 423 embeddings = embeddings[indices_to_keep] 424 paths = paths[indices_to_keep] 425 return labels, embeddings, paths KeyboardInterrupt:
Cs = [1.0, 3.0, 10.0, 30.0] # weaker regularization for 200 features
balances = [False]#[False, True] # uses your run_baseline_model's 'balance'
norms = [False, True] # uses your run_baseline_model's 'norm'
best = None
for dataset in [dataset_config]:#Cytoself_dataset_config, pretrained_dataset_config]:
print(dataset)
for C, bal, norm in itertools.product(Cs, balances, norms):
print(C,bal,norm)
clf_class = cuLogisticRegression
clf_kwargs = dict(
C=C,
)
try:
res = run_baseline_model(
dataset_config=dataset,
batches=[1, 2, 3, 7, 8, 9],
classifier_class=clf_class,
classifier_kwargs=clf_kwargs,
train_specific_batches=[1],
results_csv=f"classifier_test_linear_params.csv",
norm=norm,
balance=bal
)
# Pick your metric (prefer macro F1 if available)
score = res.get("f1_macro", res.get("accuracy"))
if score is not None and (best is None or score > best[0]):
best = (score, {"C": C, "balance": bal, "norm": norm})
except:
print('failed')
print("Best config:", best)
2025-08-21 17:51:29 INFO: [load_embeddings] multiplex=False 2025-08-21 17:51:29 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-21 17:51:29 INFO: [load_embeddings] input_folders = ['batch1'] 2025-08-21 17:51:29 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
{'path_to_embeddings': '/home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen', 'multiplexed': False, 'config_fmt': 'newNeuronsD8FigureConfig_UMAP1_B{batch}', 'config_dir': 'manuscript/manuscript_figures_data_config'}
30.0 False False
Loading all batches...
2025-08-21 17:51:38 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-21 17:51:40 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-21 17:51:42 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-21 17:51:42 INFO: [load_embeddings] embeddings shape: (196119, 192) 2025-08-21 17:51:42 INFO: [load_embeddings] labels shape: (196119,) 2025-08-21 17:51:42 INFO: [load_embeddings] example label: DAPI_WT_Untreated 2025-08-21 17:51:42 INFO: [load_embeddings] paths shape: (196119,) 2025-08-21 17:51:42 INFO: [load_embeddings] multiplex=False 2025-08-21 17:51:42 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-21 17:51:42 INFO: [load_embeddings] input_folders = ['batch2'] 2025-08-21 17:51:42 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-21 17:51:48 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-21 17:51:50 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-21 17:51:51 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-21 17:51:51 INFO: [load_embeddings] embeddings shape: (141079, 192) 2025-08-21 17:51:51 INFO: [load_embeddings] labels shape: (141079,) 2025-08-21 17:51:51 INFO: [load_embeddings] example label: HNRNPA1_WT_Untreated 2025-08-21 17:51:51 INFO: [load_embeddings] paths shape: (141079,) 2025-08-21 17:51:51 INFO: [load_embeddings] multiplex=False 2025-08-21 17:51:51 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-21 17:51:51 INFO: [load_embeddings] input_folders = ['batch3'] 2025-08-21 17:51:51 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-21 17:51:58 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-21 17:51:59 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-21 17:52:00 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-21 17:52:01 INFO: [load_embeddings] embeddings shape: (134336, 192) 2025-08-21 17:52:01 INFO: [load_embeddings] labels shape: (134336,) 2025-08-21 17:52:01 INFO: [load_embeddings] example label: TOMM20_WT_Untreated 2025-08-21 17:52:01 INFO: [load_embeddings] paths shape: (134336,) 2025-08-21 17:52:01 INFO: [load_embeddings] multiplex=False 2025-08-21 17:52:01 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-21 17:52:01 INFO: [load_embeddings] input_folders = ['batch7'] 2025-08-21 17:52:01 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-21 17:52:09 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-21 17:52:11 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-21 17:52:13 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-21 17:52:13 INFO: [load_embeddings] embeddings shape: (189079, 192) 2025-08-21 17:52:13 INFO: [load_embeddings] labels shape: (189079,) 2025-08-21 17:52:13 INFO: [load_embeddings] example label: DAPI_WT_Untreated 2025-08-21 17:52:13 INFO: [load_embeddings] paths shape: (189079,) 2025-08-21 17:52:13 INFO: [load_embeddings] multiplex=False 2025-08-21 17:52:13 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-21 17:52:13 INFO: [load_embeddings] input_folders = ['batch8'] 2025-08-21 17:52:13 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-21 17:52:23 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-21 17:52:25 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-21 17:52:27 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-21 17:52:27 INFO: [load_embeddings] embeddings shape: (169304, 192) 2025-08-21 17:52:27 INFO: [load_embeddings] labels shape: (169304,) 2025-08-21 17:52:27 INFO: [load_embeddings] example label: DCP1A_WT_Untreated 2025-08-21 17:52:27 INFO: [load_embeddings] paths shape: (169304,) 2025-08-21 17:52:27 INFO: [load_embeddings] multiplex=False 2025-08-21 17:52:27 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-21 17:52:27 INFO: [load_embeddings] input_folders = ['batch9'] 2025-08-21 17:52:27 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-21 17:52:36 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-21 17:52:38 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-21 17:52:40 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-21 17:52:40 INFO: [load_embeddings] embeddings shape: (196652, 192) 2025-08-21 17:52:40 INFO: [load_embeddings] labels shape: (196652,) 2025-08-21 17:52:40 INFO: [load_embeddings] example label: TIA1_WT_Untreated 2025-08-21 17:52:40 INFO: [load_embeddings] paths shape: (196652,)
Batches loaded.
Training on Batches: [1], Testing on: [2, 3, 7, 8, 9].
=== Batch [2, 3, 7, 8, 9] ===
Train: (196119, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27 28]
Test: (830450, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27 28]
DAPI_WT_Untreated: 63181
PEX14_WT_Untreated: 4830
Calreticulin_WT_Untreated: 6639
TDP43_WT_Untreated: 4683
TIA1_WT_Untreated: 3899
TOMM20_WT_Untreated: 4400
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LSM14A_WT_Untreated: 6008
SON_WT_Untreated: 4987
CLTC_WT_Untreated: 5749
GM130_WT_Untreated: 6376
NEMO_WT_Untreated: 5115
Tubulin_WT_Untreated: 4873
PURA_WT_Untreated: 2650
FMRP_WT_Untreated: 2678
DCP1A_WT_Untreated: 5177
NONO_WT_Untreated: 4542
PML_WT_Untreated: 4895
mitotracker_WT_Untreated: 4364
SQSTM1_WT_Untreated: 4527
ANXA11_WT_Untreated: 4777
Phalloidin_WT_Untreated: 5431
G3BP1_WT_Untreated: 2689
PSD95_WT_Untreated: 5739
FUS_WT_Untreated: 6586
SNCA_WT_Untreated: 2368
NCL_WT_Untreated: 6573
LAMP1_WT_Untreated: 1561
precision recall f1-score support
0 0.96 0.89 0.92 19584
1 0.91 0.91 0.91 24225
2 0.73 0.93 0.82 25894
3 1.00 1.00 1.00 271498
4 0.93 0.93 0.93 25887
5 0.92 0.97 0.94 8704
6 0.76 0.90 0.83 19636
7 0.82 0.98 0.89 9447
8 0.90 0.74 0.81 25126
9 0.91 0.77 0.84 24526
10 0.92 0.96 0.94 20974
11 0.66 0.90 0.76 9717
12 0.95 0.83 0.88 25124
13 0.98 0.75 0.85 25750
14 0.96 0.99 0.97 24282
15 0.90 0.82 0.86 20846
16 0.95 0.96 0.96 20145
17 0.77 0.99 0.87 19937
18 0.92 0.91 0.92 21489
19 0.85 0.91 0.88 8626
20 0.87 0.95 0.90 22312
21 0.75 0.85 0.79 11037
22 0.96 1.00 0.98 22340
23 0.62 0.65 0.63 19138
24 0.82 0.88 0.85 19716
25 0.94 0.96 0.95 16541
26 0.90 0.90 0.90 21633
27 0.77 0.69 0.73 24440
28 0.87 0.50 0.63 21876
accuracy 0.91 830450
macro avg 0.87 0.88 0.87 830450
weighted avg 0.91 0.91 0.91 830450
=== Overall Accuracy ===
0.9088193148293094 [0.9088193148293094]
2025-08-21 17:52:54 INFO: [load_embeddings] multiplex=False 2025-08-21 17:52:54 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-21 17:52:54 INFO: [load_embeddings] input_folders = ['batch1'] 2025-08-21 17:52:54 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
=== Evaluation Metrics ===
Label Accuracy Sensitivity Specificity PPV NPV F1
ANXA11_WT_Untreated 0.996503 0.891493 0.999039 0.957287 0.997384 0.923219
CLTC_WT_Untreated 0.994722 0.907410 0.997346 0.911284 0.997218 0.909343
Calreticulin_WT_Untreated 0.987077 0.934773 0.988760 0.728014 0.997881 0.818538
DAPI_WT_Untreated 0.999324 0.999963 0.999014 0.997975 0.999982 0.998968
DCP1A_WT_Untreated 0.995714 0.933287 0.997723 0.929517 0.997853 0.931398
FMRP_WT_Untreated 0.998801 0.967142 0.999136 0.922217 0.999652 0.944145
FUS_WT_Untreated 0.991017 0.901966 0.993174 0.761895 0.997615 0.826034
G3BP1_WT_Untreated 0.997327 0.977771 0.997552 0.821286 0.999744 0.892723
GM130_WT_Untreated 0.989556 0.735453 0.997484 0.901195 0.991793 0.809932
HNRNPA1_WT_Untreated 0.991071 0.771141 0.997764 0.913010 0.993068 0.836100
KIF5A_WT_Untreated 0.997063 0.964861 0.997897 0.922421 0.999088 0.943164
LAMP1_WT_Untreated 0.993506 0.902130 0.994588 0.663689 0.998836 0.764755
LSM14A_WT_Untreated 0.993414 0.827973 0.998576 0.947742 0.994654 0.883819
NCL_WT_Untreated 0.991603 0.747379 0.999418 0.976259 0.991976 0.846623
NEMO_WT_Untreated 0.998436 0.992422 0.998617 0.955777 0.999771 0.973755
NONO_WT_Untreated 0.993124 0.816272 0.997678 0.900508 0.995281 0.856323
PEX14_WT_Untreated 0.997941 0.963316 0.998802 0.952348 0.999088 0.957801
PML_WT_Untreated 0.992610 0.993730 0.992582 0.767193 0.999845 0.865890
PSD95_WT_Untreated 0.995730 0.913956 0.997902 0.920467 0.997715 0.917200
PURA_WT_Untreated 0.997476 0.914677 0.998345 0.852973 0.999104 0.882748
Phalloidin_WT_Untreated 0.994621 0.945097 0.995988 0.866743 0.998480 0.904226
SNCA_WT_Untreated 0.994119 0.846697 0.996105 0.745394 0.997931 0.792823
SON_WT_Untreated 0.998885 0.997001 0.998937 0.962865 0.999917 0.979636
SQSTM1_WT_Untreated 0.982615 0.652576 0.990401 0.615920 0.991793 0.633718
TDP43_WT_Untreated 0.992738 0.884916 0.995360 0.822622 0.997196 0.852633
TIA1_WT_Untreated 0.997965 0.958346 0.998770 0.940604 0.999153 0.949392
TOMM20_WT_Untreated 0.995032 0.904313 0.997458 0.904898 0.997441 0.904606
Tubulin_WT_Untreated 0.984923 0.689484 0.993881 0.773585 0.990615 0.729118
mitotracker_WT_Untreated 0.984725 0.496343 0.997938 0.866906 0.986529 0.631261
Macro Average 0.993712 0.876962 0.996767 0.869055 0.996780 0.867582
30.0 False True
Loading all batches...
2025-08-21 17:52:59 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-21 17:53:01 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-21 17:53:02 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-21 17:53:03 INFO: [load_embeddings] embeddings shape: (196119, 192) 2025-08-21 17:53:03 INFO: [load_embeddings] labels shape: (196119,) 2025-08-21 17:53:03 INFO: [load_embeddings] example label: DAPI_WT_Untreated 2025-08-21 17:53:03 INFO: [load_embeddings] paths shape: (196119,) 2025-08-21 17:53:03 INFO: [load_embeddings] multiplex=False 2025-08-21 17:53:03 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-21 17:53:03 INFO: [load_embeddings] input_folders = ['batch2'] 2025-08-21 17:53:03 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-21 17:53:07 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-21 17:53:08 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-21 17:53:09 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-21 17:53:09 INFO: [load_embeddings] embeddings shape: (141079, 192) 2025-08-21 17:53:09 INFO: [load_embeddings] labels shape: (141079,) 2025-08-21 17:53:09 INFO: [load_embeddings] example label: HNRNPA1_WT_Untreated 2025-08-21 17:53:09 INFO: [load_embeddings] paths shape: (141079,) 2025-08-21 17:53:10 INFO: [load_embeddings] multiplex=False 2025-08-21 17:53:10 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-21 17:53:10 INFO: [load_embeddings] input_folders = ['batch3'] 2025-08-21 17:53:10 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-21 17:53:13 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-21 17:53:15 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-21 17:53:16 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-21 17:53:16 INFO: [load_embeddings] embeddings shape: (134336, 192) 2025-08-21 17:53:16 INFO: [load_embeddings] labels shape: (134336,) 2025-08-21 17:53:16 INFO: [load_embeddings] example label: TOMM20_WT_Untreated 2025-08-21 17:53:16 INFO: [load_embeddings] paths shape: (134336,) 2025-08-21 17:53:16 INFO: [load_embeddings] multiplex=False 2025-08-21 17:53:16 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-21 17:53:16 INFO: [load_embeddings] input_folders = ['batch7'] 2025-08-21 17:53:16 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-21 17:53:21 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-21 17:53:23 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-21 17:53:25 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-21 17:53:25 INFO: [load_embeddings] embeddings shape: (189079, 192) 2025-08-21 17:53:25 INFO: [load_embeddings] labels shape: (189079,) 2025-08-21 17:53:25 INFO: [load_embeddings] example label: DAPI_WT_Untreated 2025-08-21 17:53:25 INFO: [load_embeddings] paths shape: (189079,) 2025-08-21 17:53:25 INFO: [load_embeddings] multiplex=False 2025-08-21 17:53:25 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-21 17:53:25 INFO: [load_embeddings] input_folders = ['batch8'] 2025-08-21 17:53:25 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-21 17:53:31 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-21 17:53:33 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-21 17:53:35 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-21 17:53:35 INFO: [load_embeddings] embeddings shape: (169304, 192) 2025-08-21 17:53:35 INFO: [load_embeddings] labels shape: (169304,) 2025-08-21 17:53:35 INFO: [load_embeddings] example label: DCP1A_WT_Untreated 2025-08-21 17:53:35 INFO: [load_embeddings] paths shape: (169304,) 2025-08-21 17:53:35 INFO: [load_embeddings] multiplex=False 2025-08-21 17:53:35 INFO: [load_embeddings] experiment_type = neuronsDay8_new 2025-08-21 17:53:35 INFO: [load_embeddings] input_folders = ['batch9'] 2025-08-21 17:53:35 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-08-21 17:53:40 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-08-21 17:53:43 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-08-21 17:53:44 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-08-21 17:53:45 INFO: [load_embeddings] embeddings shape: (196652, 192) 2025-08-21 17:53:45 INFO: [load_embeddings] labels shape: (196652,) 2025-08-21 17:53:45 INFO: [load_embeddings] example label: TIA1_WT_Untreated 2025-08-21 17:53:45 INFO: [load_embeddings] paths shape: (196652,)
Batches loaded.
Training on Batches: [1], Testing on: [2, 3, 7, 8, 9].
=== Batch [2, 3, 7, 8, 9] ===
Train: (196119, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27 28]
Test: (830450, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27 28]
DAPI_WT_Untreated: 63181
PEX14_WT_Untreated: 4830
Calreticulin_WT_Untreated: 6639
TDP43_WT_Untreated: 4683
TIA1_WT_Untreated: 3899
TOMM20_WT_Untreated: 4400
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LSM14A_WT_Untreated: 6008
SON_WT_Untreated: 4987
CLTC_WT_Untreated: 5749
GM130_WT_Untreated: 6376
NEMO_WT_Untreated: 5115
Tubulin_WT_Untreated: 4873
PURA_WT_Untreated: 2650
FMRP_WT_Untreated: 2678
DCP1A_WT_Untreated: 5177
NONO_WT_Untreated: 4542
PML_WT_Untreated: 4895
mitotracker_WT_Untreated: 4364
SQSTM1_WT_Untreated: 4527
ANXA11_WT_Untreated: 4777
Phalloidin_WT_Untreated: 5431
G3BP1_WT_Untreated: 2689
PSD95_WT_Untreated: 5739
FUS_WT_Untreated: 6586
SNCA_WT_Untreated: 2368
NCL_WT_Untreated: 6573
LAMP1_WT_Untreated: 1561
[W] [17:54:40.299559] L-BFGS: max iterations reached
[W] [17:54:40.301984] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
precision recall f1-score support
0 0.96 0.89 0.93 19584
1 0.93 0.91 0.92 24225
2 0.73 0.93 0.81 25894
3 1.00 1.00 1.00 271498
4 0.90 0.92 0.91 25887
5 0.93 0.96 0.94 8704
6 0.78 0.87 0.82 19636
7 0.81 0.97 0.88 9447
8 0.93 0.78 0.85 25126
9 0.88 0.80 0.84 24526
10 0.93 0.96 0.94 20974
11 0.69 0.89 0.78 9717
12 0.91 0.90 0.91 25124
13 0.99 0.75 0.85 25750
14 0.95 0.99 0.97 24282
15 0.89 0.84 0.86 20846
16 0.95 0.96 0.95 20145
17 0.81 0.99 0.89 19937
18 0.91 0.92 0.91 21489
19 0.81 0.91 0.86 8626
20 0.86 0.95 0.90 22312
21 0.78 0.81 0.80 11037
22 0.95 1.00 0.98 22340
23 0.62 0.66 0.64 19138
24 0.84 0.86 0.85 19716
25 0.95 0.96 0.95 16541
26 0.91 0.89 0.90 21633
27 0.77 0.70 0.74 24440
28 0.88 0.49 0.63 21876
accuracy 0.91 830450
macro avg 0.87 0.88 0.87 830450
weighted avg 0.91 0.91 0.91 830450
=== Overall Accuracy ===
0.9108374977421879 [0.9108374977421879]
=== Evaluation Metrics ===
Label Accuracy Sensitivity Specificity PPV NPV F1
ANXA11_WT_Untreated 0.996666 0.894557 0.999132 0.961368 0.997458 0.926760
CLTC_WT_Untreated 0.995160 0.905428 0.997857 0.926972 0.997160 0.916073
Calreticulin_WT_Untreated 0.986734 0.925504 0.988704 0.725048 0.997581 0.813103
DAPI_WT_Untreated 0.999557 0.999919 0.999381 0.998727 0.999961 0.999323
DCP1A_WT_Untreated 0.994352 0.919033 0.996776 0.901687 0.997393 0.910277
FMRP_WT_Untreated 0.998745 0.957031 0.999187 0.925761 0.999545 0.941137
FUS_WT_Untreated 0.991137 0.868660 0.994103 0.781070 0.996811 0.822539
G3BP1_WT_Untreated 0.997012 0.971419 0.997307 0.805848 0.999670 0.880922
GM130_WT_Untreated 0.991481 0.775372 0.998223 0.931574 0.993028 0.846326
HNRNPA1_WT_Untreated 0.991021 0.800294 0.996825 0.884662 0.993940 0.840366
KIF5A_WT_Untreated 0.996994 0.956708 0.998038 0.926665 0.998877 0.941447
LAMP1_WT_Untreated 0.994056 0.893280 0.995249 0.690039 0.998732 0.778615
LSM14A_WT_Untreated 0.994396 0.901091 0.997307 0.912569 0.996916 0.906793
NCL_WT_Untreated 0.991968 0.750175 0.999705 0.987880 0.992067 0.852772
NEMO_WT_Untreated 0.998254 0.989745 0.998510 0.952405 0.999691 0.970717
NONO_WT_Untreated 0.993359 0.844527 0.997191 0.885608 0.996002 0.864579
PEX14_WT_Untreated 0.997700 0.955622 0.998746 0.949869 0.998897 0.952737
PML_WT_Untreated 0.994209 0.992476 0.994252 0.809417 0.999814 0.891648
PSD95_WT_Untreated 0.995535 0.918516 0.997581 0.909795 0.997835 0.914135
PURA_WT_Untreated 0.996902 0.910735 0.997806 0.813335 0.999062 0.859284
Phalloidin_WT_Untreated 0.994610 0.947293 0.995917 0.864953 0.998541 0.904253
SNCA_WT_Untreated 0.994443 0.811543 0.996906 0.779412 0.997460 0.795153
SON_WT_Untreated 0.998647 0.997762 0.998671 0.954032 0.999938 0.975407
SQSTM1_WT_Untreated 0.982641 0.659473 0.990264 0.615058 0.991954 0.636492
TDP43_WT_Untreated 0.992904 0.860672 0.996120 0.843599 0.996610 0.852050
TIA1_WT_Untreated 0.998071 0.955142 0.998943 0.948376 0.999088 0.951747
TOMM20_WT_Untreated 0.995038 0.894421 0.997729 0.913292 0.997178 0.903758
Tubulin_WT_Untreated 0.985200 0.702537 0.993771 0.773737 0.991005 0.736420
mitotracker_WT_Untreated 0.984884 0.492915 0.998194 0.880748 0.986442 0.632082
Macro Average 0.993851 0.877650 0.996841 0.870810 0.996850 0.869549
Best config: None
X_train, y_train = load_batches([1])
le = LabelEncoder()
y_encoded = le.fit_transform(y_train)
f_scores, p_values = f_classif(X_train, y_encoded)
top_n = 100
top_idx = np.argsort(f_scores)[-top_n:]
plt.figure(figsize=(10, 5))
plt.bar(range(top_n), f_scores[top_idx])
plt.xticks(range(top_n), top_idx, rotation=45)
plt.ylabel("F-score")
plt.xlabel("Embedding dimension")
plt.title("Top correlated embedding features with labels")
plt.tight_layout()
plt.show()
def get_top_features_for_batch(batch, top_k=200):
X, y = load_batches([batch])
le = LabelEncoder()
y_encoded = le.fit_transform(y)
f_scores, _ = f_classif(X, y_encoded)
top_features = np.argsort(f_scores)[-top_k:]
return set(top_features)
batches = [1, 2, 3, 7, 8, 9,]
top_k = 100
# Get top features per batch
batch_feature_map = {batch: get_top_features_for_batch(batch, top_k) for batch in batches}
# Create overlap matrix
overlap_matrix = pd.DataFrame(index=batches, columns=batches, dtype=int)
for b1, b2 in combinations(batches, 2):
overlap = len(batch_feature_map[b1] & batch_feature_map[b2])
overlap_matrix.loc[b1, b2] = overlap
overlap_matrix.loc[b2, b1] = overlap
for b in batches:
overlap_matrix.loc[b, b] = top_k
# Plot
plt.figure(figsize=(8, 6))
sns.heatmap(overlap_matrix.astype(int), annot=True, fmt='d', cmap='Blues')
plt.title(f'Overlap of Top {top_k} Features Across Batches')
plt.xlabel("Batch")
plt.ylabel("Batch")
plt.tight_layout()
plt.show()
feat1, feat2 = top_idx[:2]
X_vis = X_train[:, [feat1, feat2]]
# Convert encoded labels back to original strings
labels_str = le.inverse_transform(y_encoded)
# Create a scatter plot
plt.figure(figsize=(8, 6))
for label in np.unique(labels_str):
idx = labels_str == label
plt.scatter(X_vis[idx, 0], X_vis[idx, 1], label=label, alpha=0.7)
plt.xlabel(f'Feature {feat1}')
plt.ylabel(f'Feature {feat2}')
plt.title('Top 2 Embeddings by Label')
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.tight_layout()
plt.show()
from sklearn.decomposition import PCA
import umap
# 1. Select top 100 features from X_train
X_top = X_train[:, top_idx[:100]]
# 2. Encode labels to color
labels_str = le.inverse_transform(y_encoded)
# --- Option A: PCA ---
use_pca = True
if use_pca:
reducer = PCA(n_components=2, random_state=42)
else:
# --- Option B: UMAP ---
reducer = umap.UMAP(n_components=2, random_state=42)
# 3. Reduce dimensions
X_embedded = reducer.fit_transform(X_top)
# 4. Plot
plt.figure(figsize=(8, 6))
for label in np.unique(labels_str):
idx = labels_str == label
plt.scatter(X_embedded[idx, 0], X_embedded[idx, 1], label=label, alpha=0.6)
plt.title("2D Projection of Top 100 Features using " + ("PCA" if use_pca else "UMAP"))
plt.xlabel("Component 1")
plt.ylabel("Component 2")
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.tight_layout()
plt.show()
# 1. Select top 100 features from X_train
X_top = X_train[:, top_idx[:100]]
# 2. Encode labels to color
labels_str = le.inverse_transform(y_encoded)
# --- Option A: PCA ---
use_pca = False
if use_pca:
reducer = PCA(n_components=2, random_state=42)
else:
# --- Option B: UMAP ---
reducer = umap.UMAP(n_components=2, random_state=42)
# 3. Reduce dimensions
X_embedded = reducer.fit_transform(X_top)
# 4. Plot
plt.figure(figsize=(8, 6))
for label in np.unique(labels_str):
idx = labels_str == label
plt.scatter(X_embedded[idx, 0], X_embedded[idx, 1], label=label, alpha=0.6)
plt.title("2D Projection of Top 100 Features using " + ("PCA" if use_pca else "UMAP"))
plt.xlabel("Component 1")
plt.ylabel("Component 2")
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.tight_layout()
plt.show()
/home/projects/hornsteinlab/galavir/.conda/envs/nova/lib/python3.9/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
# 1. Select top 100 features from X_train
X_top = X_train[:, top_idx[:1000]]
# 2. Encode labels to color
labels_str = le.inverse_transform(y_encoded)
# --- Option A: PCA ---
use_pca = True
if use_pca:
reducer = PCA(n_components=2, random_state=42)
else:
# --- Option B: UMAP ---
reducer = umap.UMAP(n_components=2, random_state=42)
# 3. Reduce dimensions
X_embedded = reducer.fit_transform(X_top)
# 4. Plot
plt.figure(figsize=(8, 6))
for label in np.unique(labels_str):
idx = labels_str == label
plt.scatter(X_embedded[idx, 0], X_embedded[idx, 1], label=label, alpha=0.6)
plt.title("2D Projection of Top 100 Features using " + ("PCA" if use_pca else "UMAP"))
plt.xlabel("Component 1")
plt.ylabel("Component 2")
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.tight_layout()
plt.show()
# 1. Select top 100 features from X_train
X_top = X_train[:, top_idx[:1000]]
# 2. Encode labels to color
labels_str = le.inverse_transform(y_encoded)
# --- Option A: PCA ---
use_pca = False
if use_pca:
reducer = PCA(n_components=2, random_state=42)
else:
# --- Option B: UMAP ---
reducer = umap.UMAP(n_components=2, random_state=42)
# 3. Reduce dimensions
X_embedded = reducer.fit_transform(X_top)
# 4. Plot
plt.figure(figsize=(8, 6))
for label in np.unique(labels_str):
idx = labels_str == label
plt.scatter(X_embedded[idx, 0], X_embedded[idx, 1], label=label, alpha=0.6)
plt.title("2D Projection of Top 100 Features using " + ("PCA" if use_pca else "UMAP"))
plt.xlabel("Component 1")
plt.ylabel("Component 2")
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.tight_layout()
plt.show()
/home/projects/hornsteinlab/galavir/.conda/envs/nova/lib/python3.9/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
# 1. Select top 100 features from X_train
X_top = X_train
# 2. Encode labels to color
labels_str = le.inverse_transform(y_encoded)
# --- Option A: PCA ---
use_pca = False
if use_pca:
reducer = PCA(n_components=2, random_state=42)
else:
# --- Option B: UMAP ---
reducer = umap.UMAP(n_components=2, random_state=42)
# 3. Reduce dimensions
X_embedded = reducer.fit_transform(X_top)
# 4. Plot
plt.figure(figsize=(8, 6))
for label in np.unique(labels_str):
idx = labels_str == label
plt.scatter(X_embedded[idx, 0], X_embedded[idx, 1], label=label, alpha=0.6)
plt.title("2D Projection of Top 100 Features using " + ("PCA" if use_pca else "UMAP"))
plt.xlabel("Component 1")
plt.ylabel("Component 2")
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.tight_layout()
plt.show()
/home/projects/hornsteinlab/galavir/.conda/envs/nova/lib/python3.9/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
# 1. Select top 100 features from X_train
X_top = X_train[:, top_idx[:10]]
# 2. Encode labels to color
labels_str = le.inverse_transform(y_encoded)
# --- Option A: PCA ---
use_pca = False
if use_pca:
reducer = PCA(n_components=2, random_state=42)
else:
# --- Option B: UMAP ---
reducer = umap.UMAP(n_components=2, random_state=42)
# 3. Reduce dimensions
X_embedded = reducer.fit_transform(X_top)
# 4. Plot
plt.figure(figsize=(8, 6))
for label in np.unique(labels_str):
idx = labels_str == label
plt.scatter(X_embedded[idx, 0], X_embedded[idx, 1], label=label, alpha=0.6)
plt.title("2D Projection of Top 100 Features using " + ("PCA" if use_pca else "UMAP"))
plt.xlabel("Component 1")
plt.ylabel("Component 2")
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.tight_layout()
plt.show()
/home/projects/hornsteinlab/galavir/.conda/envs/nova/lib/python3.9/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")